source("custom_functions.R")

Data Import

Importing ASV, taxa and metadata tables for both Czech and Norway samples.

Czech

path = "../../data/analysis_ready_data/ikem/"
asv_tab_ikem <- as.data.frame(fread(file.path(path,"asv_table_ikem.csv"),
                                    check.names = FALSE))
taxa_tab_ikem <- as.data.frame(fread(file.path(path,"taxa_table_ikem.csv"),
                                     check.names = FALSE))
metadata_ikem <- as.data.frame(fread(file.path(path,"metadata_ikem.csv"),
                                     check.names = FALSE))

Norway

path = "../../data/analysis_ready_data/norway/"
asv_tab_norway <- as.data.frame(fread(file.path(path,"asv_table_norway.csv"),
                                    check.names = FALSE))
taxa_tab_norway <- as.data.frame(fread(file.path(path,"taxa_table_norway.csv"),
                                    check.names = FALSE))
metadata_norway <- as.data.frame(fread(file.path(path,"metadata_norway.csv"),
                                    check.names = FALSE))

Merging

TO DO: STATISTICS OF READS

Merging two countries based on the different matrices - Ileum, Colon.

Terminal ileum

ileum_data <- merging_data(asv_tab_1=asv_tab_ikem,
                           asv_tab_2=asv_tab_norway,
                           taxa_tab_1=taxa_tab_ikem,
                           taxa_tab_2=taxa_tab_norway,
                           metadata_1=metadata_ikem,
                           metadata_2=metadata_norway,
                           segment="TI",Q="Q1")
Removing 1498 ASV(s)
Removing 1834 ASV(s)
Merging at ASV level
Finding inconsistencies in taxonomy, trying to keep the ones that have better taxonomy assignment
ileum_asv_tab <- ileum_data[[1]]
ileum_taxa_tab <- ileum_data[[2]]
ileum_metadata <- ileum_data[[3]]

Colon

colon_data <- merging_data(asv_tab_1=asv_tab_ikem,
                           asv_tab_2=asv_tab_norway,
                           taxa_tab_1=taxa_tab_ikem,
                           taxa_tab_2=taxa_tab_norway,
                           metadata_1=metadata_ikem,
                           metadata_2=metadata_norway,
                           segment="colon",Q="Q1")
Removing 739 ASV(s)
Removing 266 ASV(s)
Merging at ASV level
Finding inconsistencies in taxonomy, trying to keep the ones that have better taxonomy assignment
colon_asv_tab <- colon_data[[1]]
colon_taxa_tab <- colon_data[[2]]
colon_metadata <- colon_data[[3]]

Data Analysis - Terminal ileum

segment="terminal_ileum"

Machine learning

path = "../results/Q1/models_overfitting_check"

ElasticNet

model="enet"

ASV level

level="ASV"
pre_ltx vs healthy
group <- c("pre_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_ileum_uni_data <- binomial_prep(ileum_asv_tab,
                                     ileum_taxa_tab,
                                     ileum_metadata,
                                     group, usage="ml_clr")
Removing 1598 ASV(s)
Removing 146 ASV(s)
# fit the model
enet_model <- glmnet_binomial(filt_ileum_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                              reuse=FALSE,
                              file=model_name,
                              Q="Q1",
                              overfitting_check = TRUE)

# ROC curve
roc_c <- roc_curve(enet_model, group)
Warning: Ignoring unknown aesthetics: by
# save the results
models_summ <- list()
models_cm <- list()
betas <- list()
roc_cs <- list()

models_summ[[model_name]] <- enet_model$model_summary
models_cm[[model_name]] <- enet_model$conf_matrices$original
roc_cs[[model_name]] <- enet_model$kfold_rocobjs
betas[[model_name]] <- as.matrix(enet_model$betas)

# see the results
enet_model$model_summary %>% t()
                                     [,1]
alpha                           0.4000000
lambda                          0.3016707
auc                             0.5000000
auc_czech                       0.5000000
auc_no                          0.5000000
auc_optimism_corrected          0.4906897
auc_optimism_corrected_CIL      0.4032509
auc_optimism_corrected_CIU      0.5612145
accuracy                        0.5104895
accuracy_czech                        NaN
accuracy_no                     0.4512195
accuracy_optimism_corrected     0.4670757
accuracy_optimism_corrected_CIL 0.3776515
accuracy_optimism_corrected_CIU 0.5437075
enet_model$conf_matrices
$original
   0  
0 73 0
1 70 0

$czech
   0  
0 36 0
1 25 0

$no
   0  
0 37 0
1 45 0
enet_model$plot

roc_c

pre_ltx vs post_ltx
group <- c("pre_ltx","post_ltx")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_ileum_uni_data <- binomial_prep(ileum_asv_tab,
                                     ileum_taxa_tab,
                                     ileum_metadata,
                                     group, 
                                     usage="ml_clr")
Removing 979 ASV(s)
Removing 68 ASV(s)
# fit the model
enet_model <- glmnet_binomial(filt_ileum_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                              reuse=FALSE,
                              file=model_name,
                              Q="Q1",
                              overfitting_check = TRUE)

# ROC
roc_c <- roc_curve(enet_model, group)
Warning: Ignoring unknown aesthetics: by
# save the results
models_summ[[model_name]] <- enet_model$model_summary
models_cm[[model_name]] <- enet_model$conf_matrices$original
roc_cs[[model_name]] <- enet_model$kfold_rocobjs
betas[[model_name]] <- as.matrix(enet_model$betas)

# see the results
enet_model$model_summary %>% t()
                                      [,1]
alpha                            0.0000000
lambda                          92.0694517
auc                              0.9081781
auc_czech                        0.9133510
auc_no                           0.9038583
auc_optimism_corrected           0.4794630
auc_optimism_corrected_CIL       0.3607320
auc_optimism_corrected_CIU       0.5973844
accuracy                         0.6634615
accuracy_czech                         NaN
accuracy_no                      0.6219512
accuracy_optimism_corrected      0.5998475
accuracy_optimism_corrected_CIL  0.5258619
accuracy_optimism_corrected_CIU  0.6511905
enet_model$conf_matrices
$original
    0  
0 138 0
1  70 0

$czech
   0  
0 87 0
1 39 0

$no
   0  
0 51 0
1 31 0
enet_model$plot


roc_c

post_ltx vs healthy
group <- c("post_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_ileum_uni_data <- binomial_prep(ileum_asv_tab,
                                     ileum_taxa_tab,
                                     ileum_metadata,
                                     group, 
                                     usage="ml_clr")
Removing 641 ASV(s)
Removing 104 ASV(s)
# fit the model
enet_model <- glmnet_binomial(filt_ileum_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                              reuse=FALSE,
                              file=model_name,
                              Q="Q1",
                              overfitting_check = TRUE)

# ROC
roc_c <- roc_curve(enet_model, group)
Warning: Ignoring unknown aesthetics: by
# save the results
models_summ[[model_name]] <- enet_model$model_summary
models_cm[[model_name]] <- enet_model$conf_matrices$original
roc_cs[[model_name]] <- enet_model$kfold_rocobjs
betas[[model_name]] <- as.matrix(enet_model$betas)

# see the results
enet_model$model_summary %>% t()
                                       [,1]
alpha                             0.0000000
lambda                          104.9051811
auc                               0.8153663
auc_czech                         0.8154040
auc_no                            0.7917638
auc_optimism_corrected            0.5463519
auc_optimism_corrected_CIL        0.4727740
auc_optimism_corrected_CIU        0.5936274
accuracy                          0.6540284
accuracy_czech                          NaN
accuracy_no                       0.5416667
accuracy_optimism_corrected       0.5973663
accuracy_optimism_corrected_CIL   0.4692958
accuracy_optimism_corrected_CIU   0.6834416
enet_model$conf_matrices
$original
    1  
0  73 0
1 138 0

$czech
   1  
0 40 0
1 99 0

$no
   1  
0 33 0
1 39 0
enet_model$plot


roc_c

Genus level

level="genus"

Aggregate taxa

genus_data <- aggregate_taxa(ileum_asv_tab,
                             ileum_taxa_tab,
                             taxonomic_level = level)

ileum_genus_tab <- genus_data[[1]]
ileum_genus_taxa_tab <- genus_data[[2]]
pre_ltx vs healthy
group <- c("pre_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_ileum_uni_data <- binomial_prep(ileum_genus_tab,
                                     ileum_genus_taxa_tab,
                                     ileum_metadata,
                                     group, 
                                     usage="ml_clr")
Removing 84 ASV(s)
Removing 10 ASV(s)
# fit the model
enet_model <- glmnet_binomial(filt_ileum_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                              reuse=FALSE,
                              file=model_name,
                              Q="Q1",
                              overfitting_check = TRUE)

# ROC
roc_c <- roc_curve(enet_model, group)
Warning: Ignoring unknown aesthetics: by
# save the results
models_summ[[model_name]] <- enet_model$model_summary
models_cm[[model_name]] <- enet_model$conf_matrices$original
roc_cs[[model_name]] <- enet_model$kfold_rocobjs
betas[[model_name]] <- as.matrix(enet_model$betas)

# see the results
enet_model$model_summary %>% t()
                                       [,1]
alpha                             0.0000000
lambda                          117.0690961
auc                               0.7978474
auc_czech                         0.8950216
auc_no                            0.7339286
auc_optimism_corrected            0.5261700
auc_optimism_corrected_CIL        0.4175710
auc_optimism_corrected_CIU        0.6201754
accuracy                          0.5104895
accuracy_czech                          NaN
accuracy_no                       0.4878049
accuracy_optimism_corrected       0.4905480
accuracy_optimism_corrected_CIL   0.4242232
accuracy_optimism_corrected_CIU   0.5501540
enet_model$conf_matrices
$original
   0  
0 73 0
1 70 0

$czech
   0  
0 33 0
1 28 0

$no
   0  
0 40 0
1 42 0
enet_model$plot


roc_c

pre_ltx vs post_ltx
group <- c("pre_ltx","post_ltx")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_ileum_uni_data <- binomial_prep(ileum_genus_tab,
                                     ileum_genus_taxa_tab,
                                     ileum_metadata,
                                     group, 
                                     usage="ml_clr")
Removing 46 ASV(s)
Removing 6 ASV(s)
# fit the model
enet_model <- glmnet_binomial(filt_ileum_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", N=10,
                              reuse=FALSE,
                              file=model_name,
                              Q="Q1",
                              overfitting_check = TRUE)

# ROC
roc_c <- roc_curve(enet_model, group)
Warning: Ignoring unknown aesthetics: by
# save the results
models_summ[[model_name]] <- enet_model$model_summary
models_cm[[model_name]] <- enet_model$conf_matrices$original
roc_cs[[model_name]] <- enet_model$kfold_rocobjs
betas[[model_name]] <- as.matrix(enet_model$betas)

# see the results
enet_model$model_summary %>% t()
                                     [,1]
alpha                           0.8000000
lambda                          0.1118729
auc                             0.5000000
auc_czech                       0.5000000
auc_no                          0.5000000
auc_optimism_corrected          0.5630938
auc_optimism_corrected_CIL      0.4964636
auc_optimism_corrected_CIU      0.6150775
accuracy                        0.6634615
accuracy_czech                        NaN
accuracy_no                     0.6219512
accuracy_optimism_corrected     0.6072686
accuracy_optimism_corrected_CIL 0.5339850
accuracy_optimism_corrected_CIU 0.6505456
enet_model$conf_matrices
$original
    0  
0 138 0
1  70 0

$czech
   0  
0 87 0
1 39 0

$no
   0  
0 51 0
1 31 0
enet_model$plot


roc_c

post_ltx vs healthy
group <- c("post_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_ileum_uni_data <- binomial_prep(ileum_genus_tab,
                                     ileum_genus_taxa_tab,
                                     ileum_metadata,group, 
                                     usage="ml_clr")
Removing 45 ASV(s)
Removing 2 ASV(s)
# fit the model
enet_model <- glmnet_binomial(filt_ileum_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                              reuse=FALSE,
                              file=model_name,
                              Q="Q1",
                              overfitting_check = TRUE)

# ROC
roc_c <- roc_curve(enet_model, group)
Warning: Ignoring unknown aesthetics: by
# save the results
models_summ[[model_name]] <- enet_model$model_summary
models_cm[[model_name]] <- enet_model$conf_matrices$original
roc_cs[[model_name]] <- enet_model$kfold_rocobjs
betas[[model_name]] <- as.matrix(enet_model$betas)

# see the results
enet_model$model_summary %>% t()
                                      [,1]
alpha                            0.0000000
lambda                          96.2015279
auc                              0.7387334
auc_czech                        0.7366162
auc_no                           0.6876457
auc_optimism_corrected           0.6247644
auc_optimism_corrected_CIL       0.5590581
auc_optimism_corrected_CIU       0.6976382
accuracy                         0.6540284
accuracy_czech                         NaN
accuracy_no                      0.5416667
accuracy_optimism_corrected      0.6404057
accuracy_optimism_corrected_CIL  0.5724764
accuracy_optimism_corrected_CIU  0.6953307
enet_model$conf_matrices
$original
    1  
0  73 0
1 138 0

$czech
   1  
0 40 0
1 99 0

$no
   1  
0 33 0
1 39 0
enet_model$plot

roc_c

Saving results

models_summ_df_ileum <- do.call(rbind, 
  models_summ[grep(segment,names(models_summ),value = TRUE)])

write.csv(models_summ_df_ileum,file.path(path,paste0("elastic_net_",segment,".csv")))

Supplementary models

supplements_models <- list()

CLR-transformed data

kNN
model="knn"
ASV level
level="ASV"

pre_ltx vs healthy

group <- c("pre_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_ileum_uni_data <- binomial_prep(ileum_asv_tab,
                                     ileum_taxa_tab,
                                     ileum_metadata,
                                     group, usage="ml_clr")
Removing 1598 ASV(s)
Removing 146 ASV(s)
# fit the model
knn_model <- knn_binomial(filt_ileum_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                              reuse=FALSE,
                              file=model_name,
                              Q="Q1",
                              overfitting_check = TRUE)

# ROC curve
roc_c <- roc_curve(knn_model, group)
Warning: Ignoring unknown aesthetics: by
# save the results
supplements_models[["models_summ"]][["knn_model"]][[model_name]] <- knn_model$model_summary
supplements_models[["roc_cs"]][["knn_model"]][[model_name]] <- knn_model$kfold_rocobjs

# see the results
knn_model$model_summary %>% t()
                                      [,1]
k                               18.0000000
auc                              0.6246575
auc_optimism_corrected           0.4915453
auc_optimism_corrected_CIL       0.3685407
auc_optimism_corrected_CIU       0.5807214
accuracy                         0.5804196
accuracy_optimism_corrected      0.4580934
accuracy_optimism_corrected_CIL  0.3686731
accuracy_optimism_corrected_CIU  0.5555556
roc_c

pre_ltx vs post_ltx

group <- c("pre_ltx","post_ltx")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)


# prepare the data
filt_ileum_uni_data <- binomial_prep(ileum_asv_tab,
                                     ileum_taxa_tab,
                                     ileum_metadata,
                                     group, usage="ml_clr")
Removing 979 ASV(s)
Removing 68 ASV(s)
# fit the model
knn_model <- knn_binomial(filt_ileum_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                              reuse=FALSE,
                              file=model_name,
                              Q="Q1",
                              overfitting_check = TRUE)

# ROC curve
roc_c <- roc_curve(knn_model, group)
Warning: Ignoring unknown aesthetics: by
# save the results
supplements_models[["models_summ"]][["knn_model"]][[model_name]] <- knn_model$model_summary
supplements_models[["roc_cs"]][["knn_model"]][[model_name]] <- knn_model$kfold_rocobjs


# see the results
knn_model$model_summary %>% t()
                                      [,1]
k                               29.0000000
auc                              0.6160455
auc_optimism_corrected           0.5418588
auc_optimism_corrected_CIL       0.4747331
auc_optimism_corrected_CIU       0.5945267
accuracy                         0.6586538
accuracy_optimism_corrected      0.6311507
accuracy_optimism_corrected_CIL  0.5332550
accuracy_optimism_corrected_CIU  0.7125676
roc_c

post_ltx vs healthy

group <- c("post_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_ileum_uni_data <- binomial_prep(ileum_asv_tab,
                                     ileum_taxa_tab,
                                     ileum_metadata,
                                     group, usage="ml_clr")
Removing 641 ASV(s)
Removing 104 ASV(s)
# fit the model
knn_model <- knn_binomial(filt_ileum_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                              reuse=FALSE,
                              file=model_name,
                              Q="Q1",
                              overfitting_check = TRUE)

# ROC curve
roc_c <- roc_curve(knn_model, group)
Warning: Ignoring unknown aesthetics: by
# save the results
# save the results
supplements_models[["models_summ"]][["knn_model"]][[model_name]] <- knn_model$model_summary
supplements_models[["roc_cs"]][["knn_model"]][[model_name]] <- knn_model$kfold_rocobjs

# see the results
knn_model$model_summary %>% t()
                                      [,1]
k                               29.0000000
auc                              0.5878003
auc_optimism_corrected           0.5187169
auc_optimism_corrected_CIL       0.3925478
auc_optimism_corrected_CIU       0.6139842
accuracy                         0.6635071
accuracy_optimism_corrected      0.5976041
accuracy_optimism_corrected_CIL  0.5291545
accuracy_optimism_corrected_CIU  0.6686495
roc_c

Genus level
level="genus"

Aggregate taxa

genus_data <- aggregate_taxa(ileum_asv_tab,
                             ileum_taxa_tab,
                             taxonomic_level = level)

ileum_genus_tab <- genus_data[[1]]
ileum_genus_taxa_tab <- genus_data[[2]]

pre_ltx vs healthy

group <- c("pre_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_ileum_uni_data <- binomial_prep(ileum_genus_tab,
                                     ileum_genus_taxa_tab,
                                     ileum_metadata,
                                     group, 
                                     usage="ml_clr")
Removing 84 ASV(s)
Removing 10 ASV(s)
# fit the model
knn_model <- knn_binomial(filt_ileum_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                              reuse=FALSE,
                              file=model_name,
                              Q="Q1",
                              overfitting_check = TRUE)

# ROC curve
roc_c <- roc_curve(knn_model, group)
Warning: Ignoring unknown aesthetics: by
# save the results
supplements_models[["models_summ"]][["knn_model"]][[model_name]] <- knn_model$model_summary
supplements_models[["roc_cs"]][["knn_model"]][[model_name]] <- knn_model$kfold_rocobjs


# see the results
knn_model$model_summary %>% t()
                                      [,1]
k                               20.0000000
auc                              0.6411937
auc_optimism_corrected           0.4397675
auc_optimism_corrected_CIL       0.3224986
auc_optimism_corrected_CIU       0.5695415
accuracy                         0.5384615
accuracy_optimism_corrected      0.4693208
accuracy_optimism_corrected_CIL  0.3540816
accuracy_optimism_corrected_CIU  0.5338255
roc_c

pre_ltx vs post_ltx

group <- c("pre_ltx","post_ltx")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_ileum_uni_data <- binomial_prep(ileum_genus_tab,
                                     ileum_genus_taxa_tab,
                                     ileum_metadata,
                                     group, 
                                     usage="ml_clr")
Removing 46 ASV(s)
Removing 6 ASV(s)
# fit the model
knn_model <- knn_binomial(filt_ileum_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                              reuse=FALSE,
                              file=model_name,
                              Q="Q1",
                              overfitting_check = TRUE)

# ROC curve
roc_c <- roc_curve(knn_model, group)
Warning: Ignoring unknown aesthetics: by
# save the results
supplements_models[["models_summ"]][["knn_model"]][[model_name]] <- knn_model$model_summary
supplements_models[["roc_cs"]][["knn_model"]][[model_name]] <- knn_model$kfold_rocobjs


# see the results
knn_model$model_summary %>% t()
                                      [,1]
k                               30.0000000
auc                              0.5883540
auc_optimism_corrected           0.4775349
auc_optimism_corrected_CIL       0.4039989
auc_optimism_corrected_CIU       0.5404422
accuracy                         0.6634615
accuracy_optimism_corrected      0.6082219
accuracy_optimism_corrected_CIL  0.5222152
accuracy_optimism_corrected_CIU  0.7109578
roc_c

post_ltx vs healthy

group <- c("post_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_ileum_uni_data <- binomial_prep(ileum_genus_tab,
                                     ileum_genus_taxa_tab,
                                     ileum_metadata,
                                     group, 
                                     usage="ml_clr")
Removing 45 ASV(s)
Removing 2 ASV(s)
# fit the model
knn_model <- knn_binomial(filt_ileum_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                              reuse=FALSE,
                              file=model_name,
                              Q="Q1",
                              overfitting_check = TRUE)

# ROC curve
roc_c <- roc_curve(knn_model, group)
Warning: Ignoring unknown aesthetics: by
# save the results
supplements_models[["models_summ"]][["knn_model"]][[model_name]] <- knn_model$model_summary
supplements_models[["roc_cs"]][["knn_model"]][[model_name]] <- knn_model$kfold_rocobjs


# see the results
knn_model$model_summary %>% t()
                                      [,1]
k                               28.0000000
auc                              0.6282509
auc_optimism_corrected           0.5030252
auc_optimism_corrected_CIL       0.3861708
auc_optimism_corrected_CIU       0.6083495
accuracy                         0.6682464
accuracy_optimism_corrected      0.5814859
accuracy_optimism_corrected_CIL  0.4716971
accuracy_optimism_corrected_CIU  0.6906985
roc_c

Random Forest
model="rf"
ASV level
level="ASV"

pre_ltx vs healthy

group <- c("pre_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_ileum_uni_data <- binomial_prep(ileum_asv_tab,
                                     ileum_taxa_tab,
                                     ileum_metadata,
                                     group, usage="ml_clr")
Removing 1598 ASV(s)
Removing 146 ASV(s)
# fit the model
rf_model <- rf_binomial(filt_ileum_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                              reuse=FALSE,
                              file=model_name,
                              Q="Q1",
                              overfitting_check = TRUE)

# ROC curve
roc_c <- roc_curve(rf_model, group)
Warning: Ignoring unknown aesthetics: by
# save the results
supplements_models[["models_summ"]][["rf_model"]][[model_name]] <- rf_model$model_summary
supplements_models[["roc_cs"]][["rf_model"]][[model_name]] <- rf_model$kfold_rocobjs

# see the results
rf_model$model_summary %>% t()
                                [,1]       
mtry                            "61"       
splitrule                       "gini"     
min.node.size                   "5"        
auc                             "1"        
auc_optimism_corrected          "0.4301035"
auc_optimism_corrected_CIL      "0.3408448"
auc_optimism_corrected_CIU      "0.5647091"
accuracy                        "1"        
accuracy_optimism_corrected     "0.4269609"
accuracy_optimism_corrected_CIL "0.3492692"
accuracy_optimism_corrected_CIU "0.5487759"
roc_c

pre_ltx vs post_ltx

group <- c("pre_ltx","post_ltx")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_ileum_uni_data <- binomial_prep(ileum_asv_tab,
                                     ileum_taxa_tab,
                                     ileum_metadata,
                                     group, usage="ml_clr")
Removing 979 ASV(s)
Removing 68 ASV(s)
# fit the model
rf_model <- rf_binomial(filt_ileum_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                              reuse=FALSE,
                              file=model_name,
                              Q="Q1",
                              overfitting_check = TRUE)

# ROC curve
roc_c <- roc_curve(rf_model, group)
Warning: Ignoring unknown aesthetics: by
# save the results
supplements_models[["models_summ"]][["rf_model"]][[model_name]] <- rf_model$model_summary
supplements_models[["roc_cs"]][["rf_model"]][[model_name]] <- rf_model$kfold_rocobjs

# see the results
rf_model$model_summary %>% t()
                                [,1]       
mtry                            "325"      
splitrule                       "gini"     
min.node.size                   "5"        
auc                             "1"        
auc_optimism_corrected          "0.5481132"
auc_optimism_corrected_CIL      "0.4600454"
auc_optimism_corrected_CIU      "0.5999195"
accuracy                        "1"        
accuracy_optimism_corrected     "0.6499321"
accuracy_optimism_corrected_CIL "0.6094981"
accuracy_optimism_corrected_CIU "0.7139803"
roc_c

post_ltx vs healthy

group <- c("post_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_ileum_uni_data <- binomial_prep(ileum_asv_tab,
                                     ileum_taxa_tab,
                                     ileum_metadata,
                                     group, usage="ml_clr")
Removing 641 ASV(s)
Removing 104 ASV(s)
# fit the model
rf_model <- rf_binomial(filt_ileum_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                              reuse=FALSE,
                              file=model_name,
                              Q="Q1",
                              overfitting_check = TRUE)

# ROC curve
roc_c <- roc_curve(rf_model, group)
Warning: Ignoring unknown aesthetics: by
# save the results
supplements_models[["models_summ"]][["rf_model"]][[model_name]] <- rf_model$model_summary
supplements_models[["roc_cs"]][["rf_model"]][[model_name]] <- rf_model$kfold_rocobjs

# see the results
rf_model$model_summary %>% t()
                                [,1]       
mtry                            "253"      
splitrule                       "gini"     
min.node.size                   "2"        
auc                             "1"        
auc_optimism_corrected          "0.4299593"
auc_optimism_corrected_CIL      "0.374039" 
auc_optimism_corrected_CIU      "0.5151263"
accuracy                        "1"        
accuracy_optimism_corrected     "0.5824311"
accuracy_optimism_corrected_CIL "0.5220418"
accuracy_optimism_corrected_CIU "0.6492387"
roc_c

Genus level
level="genus"

Aggregate taxa

genus_data <- aggregate_taxa(ileum_asv_tab,
                             ileum_taxa_tab,
                             taxonomic_level = level)

ileum_genus_tab <- genus_data[[1]]
ileum_genus_taxa_tab <- genus_data[[2]]

pre_ltx vs healthy

group <- c("pre_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_ileum_uni_data <- binomial_prep(ileum_genus_tab,
                                     ileum_genus_taxa_tab,
                                     ileum_metadata,
                                     group, 
                                     usage="ml_clr")
Removing 84 ASV(s)
Removing 10 ASV(s)
# fit the model
rf_model <- rf_binomial(filt_ileum_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                              reuse=FALSE,
                              file=model_name,
                              Q="Q1",
                              overfitting_check = TRUE)

# ROC curve
roc_c <- roc_curve(rf_model, group)
Warning: Ignoring unknown aesthetics: by
# save the results
supplements_models[["models_summ"]][["rf_model"]][[model_name]] <- rf_model$model_summary
supplements_models[["roc_cs"]][["rf_model"]][[model_name]] <- rf_model$kfold_rocobjs

# see the results
rf_model$model_summary %>% t()
                                [,1]       
mtry                            "11"       
splitrule                       "gini"     
min.node.size                   "2"        
auc                             "1"        
auc_optimism_corrected          "0.5408093"
auc_optimism_corrected_CIL      "0.3967742"
auc_optimism_corrected_CIU      "0.6373702"
accuracy                        "1"        
accuracy_optimism_corrected     "0.5208998"
accuracy_optimism_corrected_CIL "0.3946011"
accuracy_optimism_corrected_CIU "0.6094602"
roc_c

pre_ltx vs post_ltx

group <- c("pre_ltx","post_ltx")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_ileum_uni_data <- binomial_prep(ileum_genus_tab,
                                     ileum_genus_taxa_tab,
                                     ileum_metadata,
                                     group, 
                                     usage="ml_clr")
Removing 46 ASV(s)
Removing 6 ASV(s)
# fit the model
rf_model <- rf_binomial(filt_ileum_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                              reuse=FALSE,
                              file=model_name,
                              Q="Q1",
                              overfitting_check = TRUE)

# ROC curve
roc_c <- roc_curve(rf_model, group)
Warning: Ignoring unknown aesthetics: by
# save the results
supplements_models[["models_summ"]][["rf_model"]][[model_name]] <- rf_model$model_summary
supplements_models[["roc_cs"]][["rf_model"]][[model_name]] <- rf_model$kfold_rocobjs

# see the results
rf_model$model_summary %>% t()
                                [,1]       
mtry                            "43"       
splitrule                       "gini"     
min.node.size                   "5"        
auc                             "1"        
auc_optimism_corrected          "0.4757971"
auc_optimism_corrected_CIL      "0.4098952"
auc_optimism_corrected_CIU      "0.5839277"
accuracy                        "1"        
accuracy_optimism_corrected     "0.6103743"
accuracy_optimism_corrected_CIL "0.5583919"
accuracy_optimism_corrected_CIU "0.664693" 
roc_c

post_ltx vs healthy

group <- c("post_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_ileum_uni_data <- binomial_prep(ileum_genus_tab,
                                     ileum_genus_taxa_tab,
                                     ileum_metadata,
                                     group, 
                                     usage="ml_clr")
Removing 45 ASV(s)
Removing 2 ASV(s)
# fit the model
rf_model <- rf_binomial(filt_ileum_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                              reuse=FALSE,
                              file=model_name,
                              Q="Q1",
                              overfitting_check = TRUE)

# ROC curve
roc_c <- roc_curve(rf_model, group)
Warning: Ignoring unknown aesthetics: by
# save the results
supplements_models[["models_summ"]][["rf_model"]][[model_name]] <- rf_model$model_summary
supplements_models[["roc_cs"]][["rf_model"]][[model_name]] <- rf_model$kfold_rocobjs

# see the results
rf_model$model_summary %>% t()
                                [,1]       
mtry                            "5"        
splitrule                       "gini"     
min.node.size                   "5"        
auc                             "1"        
auc_optimism_corrected          "0.5222253"
auc_optimism_corrected_CIL      "0.4556092"
auc_optimism_corrected_CIU      "0.6308566"
accuracy                        "1"        
accuracy_optimism_corrected     "0.6078591"
accuracy_optimism_corrected_CIL "0.5208554"
accuracy_optimism_corrected_CIU "0.6677322"
roc_c

Gradient boosting
model="gb"
ASV level
level="ASV"

pre_ltx vs healthy

group <- c("pre_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_ileum_uni_data <- binomial_prep(ileum_asv_tab,
                                     ileum_taxa_tab,
                                     ileum_metadata,
                                     group, usage="ml_clr")
Removing 1598 ASV(s)
Removing 146 ASV(s)
# fit the model
gbm_model <- gbm_binomial(filt_ileum_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                              reuse=FALSE,
                              file=model_name,
                              Q="Q1",
                              overfitting_check = TRUE)
Warning: There were missing values in resampled performance measures.Warning: missing values found in aggregated results
Iter   TrainDeviance   ValidDeviance   StepSize   Improve
     1        1.3551             nan     0.1000   -0.0132
     2        1.3011             nan     0.1000    0.0130
     3        1.2537             nan     0.1000   -0.0026
     4        1.2045             nan     0.1000    0.0069
     5        1.1723             nan     0.1000   -0.0065
     6        1.1371             nan     0.1000   -0.0003
     7        1.1145             nan     0.1000   -0.0075
     8        1.0956             nan     0.1000   -0.0127
     9        1.0635             nan     0.1000   -0.0054
    10        1.0276             nan     0.1000    0.0016
    20        0.8144             nan     0.1000   -0.0056
    40        0.5539             nan     0.1000   -0.0049
    60        0.3736             nan     0.1000   -0.0028
    80        0.2694             nan     0.1000   -0.0033
   100        0.1938             nan     0.1000   -0.0009
Using 100 trees...

Warning: There were missing values in resampled performance measures.Warning: missing values found in aggregated results
Iter   TrainDeviance   ValidDeviance   StepSize   Improve
     1        1.3255             nan     0.1000    0.0189
     2        1.2719             nan     0.1000    0.0151
     3        1.2411             nan     0.1000    0.0112
     4        1.1969             nan     0.1000    0.0098
     5        1.1482             nan     0.1000    0.0143
     6        1.1289             nan     0.1000   -0.0028
     7        1.1071             nan     0.1000    0.0003
     8        1.0828             nan     0.1000   -0.0009
     9        1.0623             nan     0.1000   -0.0061
    10        1.0522             nan     0.1000   -0.0011
    20        0.7976             nan     0.1000   -0.0012
    40        0.5404             nan     0.1000    0.0011
    60        0.3704             nan     0.1000   -0.0016
    80        0.2623             nan     0.1000    0.0015
   100        0.2006             nan     0.1000   -0.0030
Using 100 trees...

Using 100 trees...

Warning: There were missing values in resampled performance measures.Warning: missing values found in aggregated results
Iter   TrainDeviance   ValidDeviance   StepSize   Improve
     1        1.3396             nan     0.1000    0.0039
     2        1.2960             nan     0.1000    0.0088
     3        1.2559             nan     0.1000    0.0140
     4        1.2202             nan     0.1000    0.0053
     5        1.2112             nan     0.1000   -0.0082
     6        1.1687             nan     0.1000    0.0121
     7        1.1523             nan     0.1000   -0.0019
     8        1.1392             nan     0.1000    0.0016
     9        1.1045             nan     0.1000    0.0059
    10        1.0752             nan     0.1000    0.0074
    20        0.9007             nan     0.1000    0.0011
    40        0.6362             nan     0.1000   -0.0001
    60        0.4747             nan     0.1000    0.0001
    80        0.3589             nan     0.1000    0.0010
   100        0.2571             nan     0.1000   -0.0014
   120        0.1931             nan     0.1000   -0.0005
   140        0.1503             nan     0.1000    0.0005
   160        0.1122             nan     0.1000   -0.0007
   180        0.0842             nan     0.1000   -0.0002
   200        0.0641             nan     0.1000   -0.0011
Using 100 trees...

Using 100 trees...

Warning: There were missing values in resampled performance measures.Warning: missing values found in aggregated results
Iter   TrainDeviance   ValidDeviance   StepSize   Improve
     1        1.3511             nan     0.1000    0.0047
     2        1.3214             nan     0.1000    0.0102
     3        1.2996             nan     0.1000    0.0019
     4        1.2732             nan     0.1000    0.0036
     5        1.2532             nan     0.1000    0.0040
     6        1.2427             nan     0.1000   -0.0072
     7        1.2248             nan     0.1000    0.0026
     8        1.2065             nan     0.1000    0.0024
     9        1.1916             nan     0.1000    0.0024
    10        1.1761             nan     0.1000    0.0010
    20        1.0240             nan     0.1000   -0.0044
    40        0.7996             nan     0.1000   -0.0009
    60        0.6690             nan     0.1000   -0.0049
    80        0.5493             nan     0.1000   -0.0015
   100        0.4631             nan     0.1000   -0.0008
   120        0.3865             nan     0.1000   -0.0012
   140        0.3239             nan     0.1000   -0.0017
   160        0.2701             nan     0.1000   -0.0007
   180        0.2269             nan     0.1000   -0.0009
   200        0.1910             nan     0.1000   -0.0006
Using 100 trees...

Using 100 trees...

Warning: There were missing values in resampled performance measures.Warning: missing values found in aggregated results
Iter   TrainDeviance   ValidDeviance   StepSize   Improve
     1        1.3620             nan     0.1000   -0.0022
     2        1.3396             nan     0.1000    0.0008
     3        1.3143             nan     0.1000    0.0034
     4        1.2920             nan     0.1000    0.0048
     5        1.2648             nan     0.1000    0.0029
     6        1.2478             nan     0.1000   -0.0023
     7        1.2307             nan     0.1000    0.0005
     8        1.2104             nan     0.1000   -0.0018
     9        1.1926             nan     0.1000    0.0006
    10        1.1730             nan     0.1000    0.0004
    20        1.0132             nan     0.1000    0.0046
    40        0.7812             nan     0.1000    0.0014
    60        0.6244             nan     0.1000   -0.0001
    80        0.5104             nan     0.1000   -0.0017
   100        0.4203             nan     0.1000   -0.0022
   120        0.3488             nan     0.1000   -0.0021
   140        0.2959             nan     0.1000   -0.0005
   160        0.2499             nan     0.1000    0.0011
   180        0.2085             nan     0.1000   -0.0013
   200        0.1787             nan     0.1000   -0.0007
   220        0.1507             nan     0.1000   -0.0002
   240        0.1292             nan     0.1000   -0.0006
   260        0.1091             nan     0.1000   -0.0002
   280        0.0955             nan     0.1000   -0.0006
   300        0.0798             nan     0.1000   -0.0003
   320        0.0696             nan     0.1000   -0.0001
   340        0.0595             nan     0.1000   -0.0002
   360        0.0514             nan     0.1000   -0.0001
   380        0.0433             nan     0.1000   -0.0003
   400        0.0368             nan     0.1000   -0.0000
   420        0.0318             nan     0.1000   -0.0002
   440        0.0275             nan     0.1000   -0.0001
   460        0.0235             nan     0.1000   -0.0002
   480        0.0207             nan     0.1000   -0.0002
   500        0.0174             nan     0.1000   -0.0000
Using 100 trees...

Using 100 trees...

Warning: There were missing values in resampled performance measures.Warning: missing values found in aggregated results
Iter   TrainDeviance   ValidDeviance   StepSize   Improve
     1        1.3666             nan     0.1000   -0.0113
     2        1.3548             nan     0.1000   -0.0094
     3        1.3404             nan     0.1000   -0.0046
     4        1.3201             nan     0.1000    0.0008
     5        1.3080             nan     0.1000   -0.0084
     6        1.2823             nan     0.1000    0.0033
     7        1.2624             nan     0.1000   -0.0038
     8        1.2400             nan     0.1000    0.0033
     9        1.2159             nan     0.1000    0.0071
    10        1.1975             nan     0.1000   -0.0058
    20        1.0535             nan     0.1000    0.0001
    40        0.8332             nan     0.1000    0.0003
    60        0.6808             nan     0.1000   -0.0023
    80        0.5656             nan     0.1000    0.0003
   100        0.4692             nan     0.1000    0.0007
   120        0.4041             nan     0.1000   -0.0019
   140        0.3406             nan     0.1000   -0.0012
   160        0.3027             nan     0.1000    0.0001
   180        0.2622             nan     0.1000   -0.0011
   200        0.2241             nan     0.1000    0.0001
Using 100 trees...

Using 100 trees...

Warning: There were missing values in resampled performance measures.Warning: missing values found in aggregated results
Iter   TrainDeviance   ValidDeviance   StepSize   Improve
     1        1.3307             nan     0.1000    0.0076
     2        1.2949             nan     0.1000    0.0081
     3        1.2578             nan     0.1000    0.0081
     4        1.2128             nan     0.1000    0.0096
     5        1.1914             nan     0.1000   -0.0048
     6        1.1633             nan     0.1000   -0.0060
     7        1.1254             nan     0.1000    0.0081
     8        1.1020             nan     0.1000    0.0012
     9        1.0762             nan     0.1000   -0.0004
    10        1.0322             nan     0.1000    0.0100
    20        0.8296             nan     0.1000   -0.0017
    40        0.5826             nan     0.1000    0.0039
    60        0.4126             nan     0.1000    0.0006
    80        0.3130             nan     0.1000   -0.0011
   100        0.2309             nan     0.1000   -0.0026
   120        0.1688             nan     0.1000   -0.0002
   140        0.1221             nan     0.1000   -0.0002
   160        0.0935             nan     0.1000   -0.0003
   180        0.0723             nan     0.1000   -0.0001
   200        0.0515             nan     0.1000   -0.0001
Using 100 trees...

Using 100 trees...

Warning: There were missing values in resampled performance measures.Warning: missing values found in aggregated results
Iter   TrainDeviance   ValidDeviance   StepSize   Improve
     1        1.3479             nan     0.1000    0.0088
     2        1.3275             nan     0.1000    0.0012
     3        1.3029             nan     0.1000    0.0025
     4        1.2854             nan     0.1000   -0.0044
     5        1.2671             nan     0.1000    0.0049
     6        1.2507             nan     0.1000    0.0005
     7        1.2265             nan     0.1000    0.0066
     8        1.2013             nan     0.1000    0.0061
     9        1.1892             nan     0.1000   -0.0043
    10        1.1675             nan     0.1000    0.0043
    20        0.9901             nan     0.1000    0.0012
    40        0.7646             nan     0.1000    0.0011
    60        0.6189             nan     0.1000   -0.0033
    80        0.4998             nan     0.1000   -0.0005
   100        0.4168             nan     0.1000   -0.0004
   120        0.3374             nan     0.1000   -0.0014
   140        0.2790             nan     0.1000    0.0006
   160        0.2363             nan     0.1000   -0.0002
   180        0.2003             nan     0.1000   -0.0011
   200        0.1688             nan     0.1000   -0.0002
   220        0.1442             nan     0.1000   -0.0001
   240        0.1250             nan     0.1000   -0.0008
   260        0.1071             nan     0.1000   -0.0000
   280        0.0901             nan     0.1000   -0.0008
   300        0.0756             nan     0.1000   -0.0002
   320        0.0637             nan     0.1000   -0.0002
   340        0.0544             nan     0.1000   -0.0000
   360        0.0480             nan     0.1000   -0.0001
   380        0.0409             nan     0.1000    0.0000
   400        0.0350             nan     0.1000   -0.0001
   420        0.0300             nan     0.1000   -0.0000
   440        0.0259             nan     0.1000   -0.0000
   460        0.0223             nan     0.1000   -0.0000
   480        0.0188             nan     0.1000   -0.0001
   500        0.0161             nan     0.1000    0.0000
Using 100 trees...

Using 100 trees...

Warning: There were missing values in resampled performance measures.Warning: missing values found in aggregated results
Iter   TrainDeviance   ValidDeviance   StepSize   Improve
     1        1.3566             nan     0.1000    0.0012
     2        1.3223             nan     0.1000    0.0142
     3        1.3093             nan     0.1000   -0.0114
     4        1.2738             nan     0.1000    0.0082
     5        1.2430             nan     0.1000    0.0075
     6        1.2164             nan     0.1000    0.0068
     7        1.1932             nan     0.1000    0.0068
     8        1.1685             nan     0.1000    0.0053
     9        1.1498             nan     0.1000    0.0033
    10        1.1203             nan     0.1000    0.0096
    20        0.9234             nan     0.1000    0.0028
    40        0.7103             nan     0.1000   -0.0009
    60        0.5610             nan     0.1000   -0.0005
    80        0.4523             nan     0.1000   -0.0004
   100        0.3668             nan     0.1000   -0.0013
   120        0.3036             nan     0.1000   -0.0003
   140        0.2557             nan     0.1000   -0.0034
   160        0.2110             nan     0.1000   -0.0007
   180        0.1757             nan     0.1000   -0.0005
   200        0.1527             nan     0.1000   -0.0015
Using 100 trees...

Using 100 trees...

Warning: There were missing values in resampled performance measures.Warning: missing values found in aggregated results
Iter   TrainDeviance   ValidDeviance   StepSize   Improve
     1        1.3435             nan     0.1000    0.0111
     2        1.3228             nan     0.1000    0.0005
     3        1.2849             nan     0.1000    0.0160
     4        1.2648             nan     0.1000   -0.0015
     5        1.2454             nan     0.1000    0.0039
     6        1.2255             nan     0.1000    0.0033
     7        1.1929             nan     0.1000    0.0080
     8        1.1792             nan     0.1000   -0.0028
     9        1.1639             nan     0.1000    0.0017
    10        1.1389             nan     0.1000    0.0067
    20        0.9870             nan     0.1000   -0.0003
    40        0.7627             nan     0.1000    0.0013
    60        0.5959             nan     0.1000    0.0000
    80        0.4714             nan     0.1000    0.0014
   100        0.3805             nan     0.1000   -0.0012
Using 100 trees...

Using 100 trees...

Warning: There were missing values in resampled performance measures.Warning: missing values found in aggregated results
Iter   TrainDeviance   ValidDeviance   StepSize   Improve
     1        1.3327             nan     0.1000    0.0135
     2        1.3136             nan     0.1000    0.0002
     3        1.2921             nan     0.1000    0.0076
     4        1.2566             nan     0.1000    0.0050
     5        1.2240             nan     0.1000    0.0073
     6        1.2145             nan     0.1000   -0.0117
     7        1.1831             nan     0.1000    0.0061
     8        1.1520             nan     0.1000    0.0111
     9        1.1207             nan     0.1000    0.0068
    10        1.0886             nan     0.1000    0.0034
    20        0.8736             nan     0.1000    0.0051
    40        0.6132             nan     0.1000   -0.0013
    60        0.4463             nan     0.1000    0.0006
    80        0.3251             nan     0.1000   -0.0008
   100        0.2517             nan     0.1000   -0.0004
   120        0.1987             nan     0.1000   -0.0010
   140        0.1478             nan     0.1000   -0.0020
   160        0.1099             nan     0.1000    0.0003
   180        0.0828             nan     0.1000   -0.0009
   200        0.0634             nan     0.1000   -0.0001
Using 100 trees...

Using 100 trees...
# ROC curve
roc_c <- roc_curve(gbm_model, group)
Warning: Ignoring unknown aesthetics: by
# save the results
supplements_models[["models_summ"]][["gbm_model"]][[model_name]] <- gbm_model$model_summary
supplements_models[["roc_cs"]][["gbm_model"]][[model_name]] <- gbm_model$kfold_rocobjs

# see the results
gbm_model$model_summary %>% t()
                                       [,1]
n.trees                         100.0000000
interaction.depth                 3.0000000
shrinkage                         0.1000000
n.minobsinnode                   10.0000000
auc                               1.0000000
auc_optimism_corrected            0.4688164
auc_optimism_corrected_CIL        0.3601462
auc_optimism_corrected_CIU        0.5439275
accuracy                          1.0000000
accuracy_optimism_corrected       0.4723681
accuracy_optimism_corrected_CIL   0.3898390
accuracy_optimism_corrected_CIU   0.5189217
roc_c

pre_ltx vs post_ltx

group <- c("pre_ltx","post_ltx")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_ileum_uni_data <- binomial_prep(ileum_asv_tab,
                                     ileum_taxa_tab,
                                     ileum_metadata,
                                     group, usage="ml_clr")
Removing 979 ASV(s)
Removing 68 ASV(s)
# fit the model
gbm_model <- gbm_binomial(filt_ileum_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                              reuse=FALSE,
                              file=model_name,
                              Q="Q1",
                              overfitting_check = TRUE)
Iter   TrainDeviance   ValidDeviance   StepSize   Improve
     1        1.2588             nan     0.1000   -0.0058
     2        1.2440             nan     0.1000   -0.0029
     3        1.2238             nan     0.1000    0.0033
     4        1.2059             nan     0.1000   -0.0053
     5        1.1907             nan     0.1000   -0.0005
     6        1.1689             nan     0.1000    0.0031
     7        1.1495             nan     0.1000    0.0006
     8        1.1443             nan     0.1000   -0.0085
     9        1.1379             nan     0.1000   -0.0027
    10        1.1248             nan     0.1000   -0.0017
    20        1.0163             nan     0.1000   -0.0041
    40        0.8553             nan     0.1000   -0.0043
    60        0.7387             nan     0.1000   -0.0055
    80        0.6414             nan     0.1000   -0.0056
   100        0.5703             nan     0.1000   -0.0026
Using 100 trees...
Iter   TrainDeviance   ValidDeviance   StepSize   Improve
     1        1.2861             nan     0.1000   -0.0024
     2        1.2544             nan     0.1000    0.0133
     3        1.2268             nan     0.1000    0.0101
     4        1.1985             nan     0.1000    0.0102
     5        1.1794             nan     0.1000    0.0081
     6        1.1575             nan     0.1000    0.0003
     7        1.1335             nan     0.1000    0.0054
     8        1.1105             nan     0.1000    0.0041
     9        1.0926             nan     0.1000   -0.0022
    10        1.0782             nan     0.1000    0.0049
    20        0.9432             nan     0.1000    0.0033
    40        0.7667             nan     0.1000    0.0014
    60        0.6432             nan     0.1000    0.0003
    80        0.5633             nan     0.1000   -0.0030
   100        0.4913             nan     0.1000   -0.0017
   120        0.4282             nan     0.1000   -0.0013
   140        0.3717             nan     0.1000    0.0002
   160        0.3296             nan     0.1000   -0.0006
   180        0.2908             nan     0.1000   -0.0016
   200        0.2635             nan     0.1000   -0.0013
   220        0.2318             nan     0.1000   -0.0005
   240        0.2077             nan     0.1000    0.0001
   260        0.1841             nan     0.1000   -0.0011
   280        0.1630             nan     0.1000   -0.0002
   300        0.1457             nan     0.1000   -0.0005
   320        0.1299             nan     0.1000   -0.0006
   340        0.1177             nan     0.1000   -0.0008
   360        0.1054             nan     0.1000   -0.0002
   380        0.0944             nan     0.1000   -0.0003
   400        0.0855             nan     0.1000   -0.0004
   420        0.0770             nan     0.1000   -0.0005
   440        0.0700             nan     0.1000   -0.0000
   460        0.0641             nan     0.1000   -0.0004
   480        0.0577             nan     0.1000   -0.0003
   500        0.0526             nan     0.1000   -0.0003
Using 100 trees...

Using 100 trees...
Iter   TrainDeviance   ValidDeviance   StepSize   Improve
     1        1.1447             nan     0.1000    0.0176
     2        1.1124             nan     0.1000    0.0069
     3        1.0851             nan     0.1000    0.0063
     4        1.0556             nan     0.1000    0.0049
     5        1.0135             nan     0.1000    0.0087
     6        0.9883             nan     0.1000    0.0062
     7        0.9607             nan     0.1000    0.0072
     8        0.9285             nan     0.1000    0.0078
     9        0.9008             nan     0.1000    0.0003
    10        0.8826             nan     0.1000   -0.0023
    20        0.6920             nan     0.1000    0.0038
    40        0.4008             nan     0.1000   -0.0005
    60        0.2868             nan     0.1000    0.0010
    80        0.2022             nan     0.1000   -0.0018
   100        0.1361             nan     0.1000   -0.0007
   120        0.0993             nan     0.1000   -0.0011
   140        0.0707             nan     0.1000   -0.0005
   160        0.0494             nan     0.1000   -0.0000
   180        0.0369             nan     0.1000   -0.0001
   200        0.0271             nan     0.1000   -0.0002
Using 100 trees...

Using 100 trees...
Iter   TrainDeviance   ValidDeviance   StepSize   Improve
     1        1.2158             nan     0.1000    0.0013
     2        1.1941             nan     0.1000    0.0026
     3        1.1765             nan     0.1000    0.0028
     4        1.1548             nan     0.1000   -0.0027
     5        1.1436             nan     0.1000   -0.0048
     6        1.1264             nan     0.1000    0.0039
     7        1.1122             nan     0.1000    0.0018
     8        1.1004             nan     0.1000   -0.0024
     9        1.0859             nan     0.1000    0.0010
    10        1.0636             nan     0.1000    0.0083
    20        0.9415             nan     0.1000   -0.0003
    40        0.7798             nan     0.1000   -0.0014
    60        0.6469             nan     0.1000   -0.0027
    80        0.5538             nan     0.1000   -0.0004
   100        0.4733             nan     0.1000   -0.0014
   120        0.4109             nan     0.1000   -0.0010
   140        0.3571             nan     0.1000   -0.0002
   160        0.3113             nan     0.1000   -0.0017
   180        0.2713             nan     0.1000   -0.0008
   200        0.2393             nan     0.1000   -0.0003
   220        0.2112             nan     0.1000   -0.0008
   240        0.1843             nan     0.1000   -0.0005
   260        0.1642             nan     0.1000   -0.0005
   280        0.1465             nan     0.1000   -0.0006
   300        0.1301             nan     0.1000   -0.0005
   320        0.1149             nan     0.1000   -0.0004
   340        0.1015             nan     0.1000   -0.0005
   360        0.0903             nan     0.1000    0.0000
   380        0.0808             nan     0.1000   -0.0003
   400        0.0710             nan     0.1000   -0.0002
   420        0.0643             nan     0.1000   -0.0005
   440        0.0575             nan     0.1000   -0.0000
   460        0.0522             nan     0.1000   -0.0003
   480        0.0460             nan     0.1000   -0.0001
   500        0.0409             nan     0.1000   -0.0002
Using 100 trees...

Using 100 trees...
Iter   TrainDeviance   ValidDeviance   StepSize   Improve
     1        1.2296             nan     0.1000    0.0102
     2        1.1984             nan     0.1000    0.0092
     3        1.1692             nan     0.1000    0.0072
     4        1.1322             nan     0.1000    0.0033
     5        1.0915             nan     0.1000    0.0034
     6        1.0549             nan     0.1000    0.0040
     7        1.0289             nan     0.1000    0.0059
     8        1.0021             nan     0.1000    0.0063
     9        0.9727             nan     0.1000    0.0012
    10        0.9465             nan     0.1000    0.0029
    20        0.7228             nan     0.1000    0.0015
    40        0.4507             nan     0.1000    0.0014
    60        0.3080             nan     0.1000   -0.0024
    80        0.2148             nan     0.1000   -0.0009
   100        0.1445             nan     0.1000   -0.0004
   120        0.1026             nan     0.1000   -0.0004
   140        0.0713             nan     0.1000    0.0005
   160        0.0514             nan     0.1000   -0.0000
   180        0.0383             nan     0.1000   -0.0003
   200        0.0268             nan     0.1000   -0.0001
Using 100 trees...

Using 100 trees...
Iter   TrainDeviance   ValidDeviance   StepSize   Improve
     1        1.2555             nan     0.1000    0.0103
     2        1.2048             nan     0.1000    0.0186
     3        1.1775             nan     0.1000    0.0031
     4        1.1456             nan     0.1000   -0.0011
     5        1.1171             nan     0.1000    0.0030
     6        1.0912             nan     0.1000    0.0024
     7        1.0583             nan     0.1000    0.0037
     8        1.0325             nan     0.1000    0.0062
     9        0.9916             nan     0.1000    0.0085
    10        0.9646             nan     0.1000    0.0009
    20        0.7156             nan     0.1000    0.0025
    40        0.4579             nan     0.1000   -0.0008
    60        0.2990             nan     0.1000   -0.0022
    80        0.1972             nan     0.1000   -0.0010
   100        0.1320             nan     0.1000   -0.0008
Using 100 trees...

Using 100 trees...
Iter   TrainDeviance   ValidDeviance   StepSize   Improve
     1        1.2948             nan     0.1000    0.0092
     2        1.2556             nan     0.1000    0.0061
     3        1.2332             nan     0.1000    0.0063
     4        1.2141             nan     0.1000    0.0028
     5        1.1918             nan     0.1000    0.0059
     6        1.1657             nan     0.1000    0.0082
     7        1.1493             nan     0.1000    0.0054
     8        1.1236             nan     0.1000    0.0053
     9        1.1121             nan     0.1000   -0.0018
    10        1.1012             nan     0.1000    0.0023
    20        0.9578             nan     0.1000   -0.0053
    40        0.7255             nan     0.1000    0.0020
    60        0.5885             nan     0.1000   -0.0019
    80        0.4853             nan     0.1000   -0.0009
   100        0.3833             nan     0.1000    0.0001
   120        0.3160             nan     0.1000    0.0000
   140        0.2586             nan     0.1000   -0.0008
   160        0.2081             nan     0.1000   -0.0001
   180        0.1658             nan     0.1000   -0.0003
   200        0.1392             nan     0.1000   -0.0006
   220        0.1137             nan     0.1000   -0.0015
   240        0.0960             nan     0.1000   -0.0003
   260        0.0779             nan     0.1000    0.0001
   280        0.0654             nan     0.1000   -0.0006
   300        0.0535             nan     0.1000   -0.0001
   320        0.0454             nan     0.1000   -0.0004
   340        0.0378             nan     0.1000   -0.0002
   360        0.0310             nan     0.1000   -0.0000
   380        0.0261             nan     0.1000    0.0000
   400        0.0218             nan     0.1000   -0.0001
   420        0.0177             nan     0.1000   -0.0000
   440        0.0149             nan     0.1000   -0.0000
   460        0.0125             nan     0.1000   -0.0000
   480        0.0108             nan     0.1000   -0.0000
   500        0.0090             nan     0.1000   -0.0000
Using 100 trees...

Using 100 trees...
Iter   TrainDeviance   ValidDeviance   StepSize   Improve
     1        1.1939             nan     0.1000    0.0081
     2        1.1717             nan     0.1000   -0.0043
     3        1.1404             nan     0.1000    0.0066
     4        1.1242             nan     0.1000    0.0002
     5        1.0899             nan     0.1000    0.0084
     6        1.0649             nan     0.1000    0.0017
     7        1.0340             nan     0.1000    0.0075
     8        1.0254             nan     0.1000   -0.0009
     9        1.0018             nan     0.1000    0.0051
    10        0.9895             nan     0.1000    0.0007
    20        0.8351             nan     0.1000   -0.0014
    40        0.6068             nan     0.1000    0.0023
    60        0.4653             nan     0.1000    0.0002
    80        0.3734             nan     0.1000   -0.0005
   100        0.3110             nan     0.1000   -0.0007
   120        0.2391             nan     0.1000   -0.0003
   140        0.1883             nan     0.1000   -0.0004
   160        0.1535             nan     0.1000   -0.0001
   180        0.1251             nan     0.1000   -0.0005
   200        0.0972             nan     0.1000   -0.0010
   220        0.0834             nan     0.1000   -0.0003
   240        0.0637             nan     0.1000   -0.0001
   260        0.0521             nan     0.1000   -0.0004
   280        0.0403             nan     0.1000   -0.0001
   300        0.0317             nan     0.1000   -0.0003
   320        0.0259             nan     0.1000   -0.0001
   340        0.0210             nan     0.1000   -0.0000
   360        0.0174             nan     0.1000   -0.0001
   380        0.0144             nan     0.1000   -0.0001
   400        0.0121             nan     0.1000   -0.0001
   420        0.0097             nan     0.1000   -0.0001
   440        0.0080             nan     0.1000   -0.0001
   460        0.0066             nan     0.1000   -0.0000
   480        0.0054             nan     0.1000   -0.0001
   500        0.0043             nan     0.1000   -0.0000
Using 100 trees...

Using 100 trees...
Iter   TrainDeviance   ValidDeviance   StepSize   Improve
     1        1.2386             nan     0.1000   -0.0003
     2        1.2143             nan     0.1000    0.0061
     3        1.1998             nan     0.1000    0.0024
     4        1.1787             nan     0.1000    0.0050
     5        1.1571             nan     0.1000    0.0045
     6        1.1319             nan     0.1000    0.0101
     7        1.1204             nan     0.1000    0.0007
     8        1.1027             nan     0.1000    0.0043
     9        1.0943             nan     0.1000   -0.0036
    10        1.0698             nan     0.1000    0.0095
    20        0.9326             nan     0.1000    0.0014
    40        0.7408             nan     0.1000   -0.0003
    60        0.5997             nan     0.1000   -0.0014
    80        0.5007             nan     0.1000   -0.0005
   100        0.4244             nan     0.1000   -0.0013
   120        0.3661             nan     0.1000   -0.0025
   140        0.3114             nan     0.1000   -0.0004
   160        0.2622             nan     0.1000    0.0001
   180        0.2265             nan     0.1000   -0.0007
   200        0.1981             nan     0.1000   -0.0003
Using 100 trees...

Using 100 trees...
Iter   TrainDeviance   ValidDeviance   StepSize   Improve
     1        1.1688             nan     0.1000    0.0319
     2        1.1370             nan     0.1000    0.0067
     3        1.1047             nan     0.1000    0.0019
     4        1.0768             nan     0.1000   -0.0005
     5        1.0322             nan     0.1000    0.0123
     6        0.9844             nan     0.1000    0.0142
     7        0.9361             nan     0.1000    0.0119
     8        0.9037             nan     0.1000    0.0052
     9        0.8697             nan     0.1000    0.0088
    10        0.8384             nan     0.1000    0.0059
    20        0.6652             nan     0.1000    0.0026
    40        0.4049             nan     0.1000    0.0003
    60        0.2511             nan     0.1000   -0.0014
    80        0.1679             nan     0.1000   -0.0003
   100        0.1252             nan     0.1000   -0.0008
Using 100 trees...

Using 100 trees...
Iter   TrainDeviance   ValidDeviance   StepSize   Improve
     1        1.1830             nan     0.1000    0.0072
     2        1.1180             nan     0.1000    0.0180
     3        1.0665             nan     0.1000    0.0006
     4        1.0200             nan     0.1000    0.0021
     5        0.9613             nan     0.1000    0.0117
     6        0.9148             nan     0.1000    0.0122
     7        0.8732             nan     0.1000    0.0073
     8        0.8374             nan     0.1000    0.0028
     9        0.8178             nan     0.1000   -0.0055
    10        0.7776             nan     0.1000    0.0087
    20        0.5142             nan     0.1000    0.0011
    40        0.2569             nan     0.1000    0.0006
    60        0.1465             nan     0.1000   -0.0012
    80        0.0861             nan     0.1000   -0.0006
   100        0.0533             nan     0.1000   -0.0004
   120        0.0322             nan     0.1000   -0.0002
   140        0.0192             nan     0.1000   -0.0001
   160        0.0112             nan     0.1000   -0.0001
   180        0.0070             nan     0.1000   -0.0000
   200        0.0043             nan     0.1000    0.0000
   220        0.0027             nan     0.1000   -0.0000
   240        0.0017             nan     0.1000   -0.0000
   260        0.0010             nan     0.1000    0.0000
   280        0.0006             nan     0.1000   -0.0000
   300        0.0004             nan     0.1000   -0.0000
   320        0.0003             nan     0.1000   -0.0000
   340        0.0002             nan     0.1000   -0.0000
   360        0.0001             nan     0.1000    0.0000
   380        0.0001             nan     0.1000   -0.0000
   400        0.0000             nan     0.1000   -0.0000
   420        0.0000             nan     0.1000   -0.0000
   440        0.0000             nan     0.1000    0.0000
   460        0.0000             nan     0.1000    0.0000
   480        0.0000             nan     0.1000   -0.0000
   500        0.0000             nan     0.1000   -0.0000
Using 100 trees...

Using 100 trees...
# ROC curve
roc_c <- roc_curve(gbm_model, group)
Warning: Ignoring unknown aesthetics: by
# save the results
supplements_models[["models_summ"]][["gbm_model"]][[model_name]] <- gbm_model$model_summary
supplements_models[["roc_cs"]][["gbm_model"]][[model_name]] <- gbm_model$kfold_rocobjs


# see the results
gbm_model$model_summary %>% t()
                                       [,1]
n.trees                         100.0000000
interaction.depth                 5.0000000
shrinkage                         0.1000000
n.minobsinnode                   30.0000000
auc                               0.9924431
auc_optimism_corrected            0.5406939
auc_optimism_corrected_CIL        0.4727791
auc_optimism_corrected_CIU        0.6574737
accuracy                          0.9567308
accuracy_optimism_corrected       0.6216785
accuracy_optimism_corrected_CIL   0.5529762
accuracy_optimism_corrected_CIU   0.6873889
roc_c

post_ltx vs healthy

group <- c("post_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_ileum_uni_data <- binomial_prep(ileum_asv_tab,
                                     ileum_taxa_tab,
                                     ileum_metadata,
                                     group, usage="ml_clr")
Removing 641 ASV(s)
Removing 104 ASV(s)
# fit the model
gbm_model <- gbm_binomial(filt_ileum_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                              reuse=FALSE,
                              file=model_name,
                              Q="Q1",
                              overfitting_check = TRUE)
Iter   TrainDeviance   ValidDeviance   StepSize   Improve
     1        1.2581             nan     0.1000   -0.0001
     2        1.2346             nan     0.1000   -0.0028
     3        1.2082             nan     0.1000   -0.0026
     4        1.1824             nan     0.1000   -0.0070
     5        1.1575             nan     0.1000   -0.0028
     6        1.1322             nan     0.1000   -0.0006
     7        1.1107             nan     0.1000   -0.0048
     8        1.0967             nan     0.1000   -0.0138
     9        1.0683             nan     0.1000    0.0015
    10        1.0498             nan     0.1000   -0.0057
    20        0.8683             nan     0.1000   -0.0011
    40        0.6059             nan     0.1000   -0.0058
    60        0.4462             nan     0.1000    0.0009
    80        0.3300             nan     0.1000   -0.0018
   100        0.2482             nan     0.1000   -0.0006
Using 100 trees...
Iter   TrainDeviance   ValidDeviance   StepSize   Improve
     1        1.2490             nan     0.1000    0.0172
     2        1.2057             nan     0.1000    0.0067
     3        1.1702             nan     0.1000    0.0053
     4        1.1171             nan     0.1000    0.0126
     5        1.0914             nan     0.1000   -0.0043
     6        1.0514             nan     0.1000    0.0083
     7        1.0186             nan     0.1000    0.0045
     8        0.9884             nan     0.1000    0.0048
     9        0.9537             nan     0.1000    0.0080
    10        0.9206             nan     0.1000    0.0087
    20        0.6551             nan     0.1000    0.0071
    40        0.3888             nan     0.1000   -0.0012
    60        0.2474             nan     0.1000   -0.0004
    80        0.1621             nan     0.1000   -0.0001
   100        0.1074             nan     0.1000   -0.0002
   120        0.0733             nan     0.1000   -0.0006
   140        0.0489             nan     0.1000    0.0001
   160        0.0327             nan     0.1000   -0.0002
   180        0.0227             nan     0.1000   -0.0000
   200        0.0155             nan     0.1000   -0.0000
Using 100 trees...

Using 100 trees...
Iter   TrainDeviance   ValidDeviance   StepSize   Improve
     1        1.1972             nan     0.1000    0.0119
     2        1.1341             nan     0.1000    0.0163
     3        1.0737             nan     0.1000    0.0152
     4        1.0279             nan     0.1000    0.0073
     5        0.9777             nan     0.1000    0.0163
     6        0.9318             nan     0.1000    0.0076
     7        0.8921             nan     0.1000    0.0085
     8        0.8422             nan     0.1000    0.0135
     9        0.8022             nan     0.1000    0.0078
    10        0.7616             nan     0.1000    0.0104
    20        0.5107             nan     0.1000   -0.0022
    40        0.2621             nan     0.1000   -0.0001
    60        0.1408             nan     0.1000    0.0001
    80        0.0781             nan     0.1000    0.0002
   100        0.0460             nan     0.1000   -0.0006
   120        0.0271             nan     0.1000   -0.0005
   140        0.0155             nan     0.1000   -0.0003
   160        0.0099             nan     0.1000   -0.0002
   180        0.0062             nan     0.1000   -0.0000
   200        0.0037             nan     0.1000   -0.0001
   220        0.0024             nan     0.1000   -0.0001
   240        0.0016             nan     0.1000   -0.0000
   260        0.0010             nan     0.1000   -0.0000
   280        0.0007             nan     0.1000   -0.0000
   300        0.0006             nan     0.1000    0.0000
   320        0.0003             nan     0.1000   -0.0000
   340        0.0002             nan     0.1000   -0.0000
   360        0.0001             nan     0.1000   -0.0000
   380        0.0001             nan     0.1000    0.0000
   400        0.0001             nan     0.1000   -0.0000
   420        0.0001             nan     0.1000    0.0000
   440        0.0000             nan     0.1000   -0.0000
   460        0.0000             nan     0.1000    0.0000
   480        0.0000             nan     0.1000   -0.0000
   500        0.0000             nan     0.1000   -0.0000
Using 100 trees...

Using 100 trees...
Iter   TrainDeviance   ValidDeviance   StepSize   Improve
     1        1.2924             nan     0.1000    0.0057
     2        1.2825             nan     0.1000   -0.0021
     3        1.2619             nan     0.1000    0.0044
     4        1.2437             nan     0.1000    0.0046
     5        1.2206             nan     0.1000    0.0036
     6        1.2120             nan     0.1000   -0.0048
     7        1.2018             nan     0.1000   -0.0007
     8        1.1817             nan     0.1000    0.0052
     9        1.1736             nan     0.1000   -0.0034
    10        1.1638             nan     0.1000   -0.0019
    20        1.0516             nan     0.1000    0.0006
    40        0.8866             nan     0.1000   -0.0008
    60        0.7605             nan     0.1000    0.0006
    80        0.6562             nan     0.1000   -0.0005
   100        0.5727             nan     0.1000   -0.0012
   120        0.5016             nan     0.1000    0.0001
   140        0.4462             nan     0.1000   -0.0003
   160        0.3960             nan     0.1000   -0.0006
   180        0.3505             nan     0.1000   -0.0001
   200        0.3141             nan     0.1000   -0.0011
   220        0.2812             nan     0.1000   -0.0015
   240        0.2494             nan     0.1000   -0.0005
   260        0.2253             nan     0.1000   -0.0015
   280        0.2023             nan     0.1000   -0.0007
   300        0.1828             nan     0.1000   -0.0006
   320        0.1658             nan     0.1000   -0.0002
   340        0.1481             nan     0.1000   -0.0003
   360        0.1309             nan     0.1000    0.0002
   380        0.1169             nan     0.1000   -0.0006
   400        0.1053             nan     0.1000   -0.0000
   420        0.0949             nan     0.1000   -0.0002
   440        0.0862             nan     0.1000   -0.0003
   460        0.0781             nan     0.1000   -0.0000
   480        0.0710             nan     0.1000   -0.0000
   500        0.0656             nan     0.1000   -0.0001
Using 100 trees...

Using 100 trees...
Iter   TrainDeviance   ValidDeviance   StepSize   Improve
     1        1.2567             nan     0.1000    0.0031
     2        1.2334             nan     0.1000    0.0068
     3        1.2161             nan     0.1000   -0.0014
     4        1.1892             nan     0.1000    0.0076
     5        1.1691             nan     0.1000    0.0062
     6        1.1602             nan     0.1000   -0.0029
     7        1.1414             nan     0.1000    0.0027
     8        1.1300             nan     0.1000   -0.0025
     9        1.1193             nan     0.1000   -0.0031
    10        1.1093             nan     0.1000   -0.0022
    20        0.9919             nan     0.1000    0.0013
    40        0.8018             nan     0.1000   -0.0046
    60        0.6759             nan     0.1000   -0.0003
    80        0.5683             nan     0.1000   -0.0014
   100        0.4893             nan     0.1000    0.0007
   120        0.4201             nan     0.1000    0.0010
   140        0.3657             nan     0.1000   -0.0008
   160        0.3182             nan     0.1000    0.0008
   180        0.2816             nan     0.1000   -0.0006
   200        0.2453             nan     0.1000   -0.0003
Using 100 trees...

Using 100 trees...
Iter   TrainDeviance   ValidDeviance   StepSize   Improve
     1        1.3135             nan     0.1000   -0.0040
     2        1.2960             nan     0.1000    0.0012
     3        1.2793             nan     0.1000    0.0032
     4        1.2649             nan     0.1000    0.0018
     5        1.2436             nan     0.1000    0.0063
     6        1.2317             nan     0.1000   -0.0054
     7        1.2158             nan     0.1000    0.0037
     8        1.1978             nan     0.1000    0.0021
     9        1.1814             nan     0.1000    0.0031
    10        1.1647             nan     0.1000    0.0027
    20        1.0362             nan     0.1000    0.0024
    40        0.8549             nan     0.1000   -0.0021
    60        0.7258             nan     0.1000   -0.0029
    80        0.6269             nan     0.1000   -0.0039
   100        0.5463             nan     0.1000   -0.0013
   120        0.4788             nan     0.1000   -0.0002
   140        0.4229             nan     0.1000   -0.0008
   160        0.3765             nan     0.1000   -0.0021
   180        0.3339             nan     0.1000    0.0000
   200        0.2990             nan     0.1000   -0.0017
   220        0.2639             nan     0.1000    0.0004
   240        0.2318             nan     0.1000    0.0004
   260        0.2089             nan     0.1000   -0.0001
   280        0.1857             nan     0.1000   -0.0008
   300        0.1639             nan     0.1000   -0.0007
   320        0.1456             nan     0.1000    0.0000
   340        0.1310             nan     0.1000   -0.0002
   360        0.1152             nan     0.1000   -0.0002
   380        0.1037             nan     0.1000   -0.0003
   400        0.0932             nan     0.1000   -0.0003
   420        0.0830             nan     0.1000   -0.0001
   440        0.0743             nan     0.1000   -0.0001
   460        0.0668             nan     0.1000   -0.0002
   480        0.0606             nan     0.1000   -0.0001
   500        0.0541             nan     0.1000   -0.0001
Using 100 trees...

Using 100 trees...
Iter   TrainDeviance   ValidDeviance   StepSize   Improve
     1        1.3075             nan     0.1000    0.0123
     2        1.2430             nan     0.1000    0.0180
     3        1.2133             nan     0.1000   -0.0068
     4        1.1627             nan     0.1000    0.0135
     5        1.1252             nan     0.1000    0.0076
     6        1.0842             nan     0.1000    0.0071
     7        1.0450             nan     0.1000    0.0064
     8        1.0112             nan     0.1000    0.0082
     9        0.9857             nan     0.1000   -0.0024
    10        0.9611             nan     0.1000    0.0009
    20        0.7119             nan     0.1000    0.0040
    40        0.4285             nan     0.1000   -0.0000
    60        0.2691             nan     0.1000    0.0005
    80        0.1792             nan     0.1000    0.0001
   100        0.1189             nan     0.1000   -0.0003
   120        0.0804             nan     0.1000   -0.0006
   140        0.0537             nan     0.1000   -0.0004
   160        0.0360             nan     0.1000    0.0000
   180        0.0255             nan     0.1000   -0.0002
   200        0.0177             nan     0.1000   -0.0000
   220        0.0122             nan     0.1000   -0.0000
   240        0.0084             nan     0.1000   -0.0000
   260        0.0057             nan     0.1000   -0.0000
   280        0.0040             nan     0.1000   -0.0000
   300        0.0028             nan     0.1000   -0.0000
   320        0.0020             nan     0.1000   -0.0000
   340        0.0014             nan     0.1000   -0.0000
   360        0.0010             nan     0.1000   -0.0000
   380        0.0007             nan     0.1000   -0.0000
   400        0.0005             nan     0.1000    0.0000
   420        0.0003             nan     0.1000    0.0000
   440        0.0002             nan     0.1000   -0.0000
   460        0.0002             nan     0.1000   -0.0000
   480        0.0001             nan     0.1000   -0.0000
   500        0.0001             nan     0.1000    0.0000
Using 100 trees...

Using 100 trees...
Iter   TrainDeviance   ValidDeviance   StepSize   Improve
     1        1.2683             nan     0.1000    0.0063
     2        1.2420             nan     0.1000   -0.0053
     3        1.2043             nan     0.1000    0.0086
     4        1.1636             nan     0.1000    0.0068
     5        1.1244             nan     0.1000    0.0063
     6        1.0971             nan     0.1000    0.0062
     7        1.0650             nan     0.1000    0.0049
     8        1.0472             nan     0.1000   -0.0074
     9        1.0215             nan     0.1000    0.0059
    10        1.0042             nan     0.1000   -0.0010
    20        0.7866             nan     0.1000    0.0014
    40        0.5172             nan     0.1000    0.0032
    60        0.3541             nan     0.1000   -0.0024
    80        0.2434             nan     0.1000   -0.0009
   100        0.1758             nan     0.1000   -0.0016
   120        0.1262             nan     0.1000   -0.0009
   140        0.0915             nan     0.1000   -0.0004
   160        0.0652             nan     0.1000   -0.0005
   180        0.0475             nan     0.1000   -0.0002
   200        0.0349             nan     0.1000   -0.0000
Using 100 trees...

Using 100 trees...
Iter   TrainDeviance   ValidDeviance   StepSize   Improve
     1        1.2510             nan     0.1000   -0.0002
     2        1.2277             nan     0.1000    0.0078
     3        1.2171             nan     0.1000   -0.0028
     4        1.2019             nan     0.1000    0.0016
     5        1.1828             nan     0.1000    0.0037
     6        1.1611             nan     0.1000    0.0040
     7        1.1442             nan     0.1000    0.0052
     8        1.1266             nan     0.1000    0.0026
     9        1.1118             nan     0.1000    0.0043
    10        1.1001             nan     0.1000    0.0023
    20        0.9939             nan     0.1000    0.0021
    40        0.8335             nan     0.1000   -0.0026
    60        0.7174             nan     0.1000    0.0008
    80        0.6198             nan     0.1000    0.0008
   100        0.5445             nan     0.1000   -0.0025
   120        0.4695             nan     0.1000    0.0006
   140        0.4193             nan     0.1000   -0.0024
   160        0.3731             nan     0.1000   -0.0022
   180        0.3291             nan     0.1000   -0.0013
   200        0.2947             nan     0.1000   -0.0019
Using 100 trees...

Using 100 trees...
Iter   TrainDeviance   ValidDeviance   StepSize   Improve
     1        1.2653             nan     0.1000    0.0168
     2        1.2329             nan     0.1000    0.0114
     3        1.2066             nan     0.1000   -0.0020
     4        1.1751             nan     0.1000    0.0063
     5        1.1488             nan     0.1000    0.0074
     6        1.1278             nan     0.1000    0.0014
     7        1.0979             nan     0.1000    0.0103
     8        1.0810             nan     0.1000    0.0001
     9        1.0561             nan     0.1000    0.0039
    10        1.0496             nan     0.1000   -0.0012
    20        0.8998             nan     0.1000   -0.0011
    40        0.6745             nan     0.1000    0.0010
    60        0.5035             nan     0.1000   -0.0007
    80        0.3951             nan     0.1000    0.0010
   100        0.3220             nan     0.1000   -0.0014
   120        0.2631             nan     0.1000   -0.0007
   140        0.2117             nan     0.1000   -0.0001
   160        0.1746             nan     0.1000   -0.0005
   180        0.1438             nan     0.1000   -0.0012
   200        0.1164             nan     0.1000   -0.0007
Using 100 trees...

Using 100 trees...
Iter   TrainDeviance   ValidDeviance   StepSize   Improve
     1        1.1371             nan     0.1000    0.0137
     2        1.1171             nan     0.1000    0.0028
     3        1.1054             nan     0.1000   -0.0048
     4        1.0880             nan     0.1000   -0.0010
     5        1.0763             nan     0.1000   -0.0007
     6        1.0518             nan     0.1000    0.0019
     7        1.0366             nan     0.1000   -0.0003
     8        1.0198             nan     0.1000    0.0030
     9        1.0024             nan     0.1000    0.0038
    10        0.9892             nan     0.1000   -0.0011
    20        0.8553             nan     0.1000   -0.0006
    40        0.6747             nan     0.1000    0.0019
    60        0.5484             nan     0.1000    0.0007
    80        0.4696             nan     0.1000   -0.0003
   100        0.3950             nan     0.1000    0.0001
   120        0.3356             nan     0.1000   -0.0016
   140        0.2901             nan     0.1000   -0.0002
   160        0.2494             nan     0.1000   -0.0013
   180        0.2151             nan     0.1000   -0.0002
   200        0.1873             nan     0.1000    0.0001
   220        0.1645             nan     0.1000   -0.0008
   240        0.1431             nan     0.1000    0.0001
   260        0.1239             nan     0.1000   -0.0001
   280        0.1082             nan     0.1000    0.0000
   300        0.0962             nan     0.1000   -0.0003
   320        0.0855             nan     0.1000   -0.0001
   340        0.0753             nan     0.1000   -0.0002
   360        0.0673             nan     0.1000   -0.0002
   380        0.0597             nan     0.1000   -0.0001
   400        0.0522             nan     0.1000   -0.0001
   420        0.0466             nan     0.1000   -0.0002
   440        0.0409             nan     0.1000   -0.0000
   460        0.0363             nan     0.1000   -0.0000
   480        0.0320             nan     0.1000   -0.0001
   500        0.0287             nan     0.1000   -0.0001
Using 100 trees...

Using 100 trees...
# ROC curve
roc_c <- roc_curve(gbm_model, group)
Warning: Ignoring unknown aesthetics: by
# save the results
supplements_models[["models_summ"]][["gbm_model"]][[model_name]] <- gbm_model$model_summary
supplements_models[["roc_cs"]][["gbm_model"]][[model_name]] <- gbm_model$kfold_rocobjs


# see the results
gbm_model$model_summary %>% t()
                                       [,1]
n.trees                         100.0000000
interaction.depth                 3.0000000
shrinkage                         0.1000000
n.minobsinnode                   10.0000000
auc                               1.0000000
auc_optimism_corrected            0.4883825
auc_optimism_corrected_CIL        0.4050371
auc_optimism_corrected_CIU        0.5638409
accuracy                          1.0000000
accuracy_optimism_corrected       0.5539314
accuracy_optimism_corrected_CIL   0.4967160
accuracy_optimism_corrected_CIU   0.6098763
roc_c

Genus level
level="genus"

Aggregate taxa

genus_data <- aggregate_taxa(ileum_asv_tab,
                             ileum_taxa_tab,
                             taxonomic_level = level)

ileum_genus_tab <- genus_data[[1]]
ileum_genus_taxa_tab <- genus_data[[2]]

pre_ltx vs healthy

group <- c("pre_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_ileum_uni_data <- binomial_prep(ileum_genus_tab,
                                     ileum_genus_taxa_tab,
                                     ileum_metadata,
                                     group, 
                                     usage="ml_clr")
Removing 84 ASV(s)
Removing 10 ASV(s)
# fit the model
gbm_model <- gbm_binomial(filt_ileum_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                              reuse=FALSE,
                              file=model_name,
                              Q="Q1",
                              overfitting_check = TRUE)
Warning: There were missing values in resampled performance measures.Warning: missing values found in aggregated results
Iter   TrainDeviance   ValidDeviance   StepSize   Improve
     1        1.3183             nan     0.1000    0.0040
     2        1.2770             nan     0.1000   -0.0038
     3        1.2238             nan     0.1000    0.0087
     4        1.1830             nan     0.1000   -0.0053
     5        1.1544             nan     0.1000   -0.0104
     6        1.1189             nan     0.1000   -0.0037
     7        1.0779             nan     0.1000    0.0021
     8        1.0531             nan     0.1000   -0.0068
     9        1.0280             nan     0.1000   -0.0071
    10        0.9946             nan     0.1000   -0.0055
    20        0.7682             nan     0.1000   -0.0033
    40        0.4929             nan     0.1000   -0.0023
    60        0.2969             nan     0.1000   -0.0021
    80        0.1988             nan     0.1000   -0.0010
   100        0.1343             nan     0.1000   -0.0014
Using 100 trees...

Warning: There were missing values in resampled performance measures.Warning: missing values found in aggregated results
Iter   TrainDeviance   ValidDeviance   StepSize   Improve
     1        1.3051             nan     0.1000    0.0279
     2        1.2148             nan     0.1000    0.0355
     3        1.1648             nan     0.1000    0.0060
     4        1.0901             nan     0.1000    0.0170
     5        1.0334             nan     0.1000    0.0167
     6        0.9953             nan     0.1000    0.0033
     7        0.9420             nan     0.1000    0.0177
     8        0.9059             nan     0.1000    0.0029
     9        0.8688             nan     0.1000    0.0092
    10        0.8397             nan     0.1000    0.0011
    20        0.5775             nan     0.1000    0.0028
    40        0.2913             nan     0.1000   -0.0019
    60        0.1490             nan     0.1000   -0.0014
    80        0.0770             nan     0.1000   -0.0003
   100        0.0404             nan     0.1000   -0.0006
   120        0.0235             nan     0.1000    0.0002
   140        0.0138             nan     0.1000   -0.0002
   160        0.0084             nan     0.1000   -0.0000
   180        0.0047             nan     0.1000    0.0000
   200        0.0029             nan     0.1000   -0.0000
   220        0.0017             nan     0.1000   -0.0000
   240        0.0012             nan     0.1000   -0.0000
   260        0.0008             nan     0.1000    0.0000
   280        0.0004             nan     0.1000   -0.0000
   300        0.0002             nan     0.1000   -0.0000
   320        0.0001             nan     0.1000    0.0000
   340        0.0001             nan     0.1000   -0.0000
   360        0.0001             nan     0.1000   -0.0000
   380        0.0000             nan     0.1000   -0.0000
   400        0.0000             nan     0.1000    0.0000
   420        0.0000             nan     0.1000   -0.0000
   440        0.0000             nan     0.1000   -0.0000
   460        0.0000             nan     0.1000   -0.0000
   480        0.0000             nan     0.1000   -0.0000
   500        0.0000             nan     0.1000   -0.0000
Using 100 trees...

Using 100 trees...

Warning: There were missing values in resampled performance measures.Warning: missing values found in aggregated results
Iter   TrainDeviance   ValidDeviance   StepSize   Improve
     1        1.3318             nan     0.1000    0.0124
     2        1.2923             nan     0.1000    0.0148
     3        1.2593             nan     0.1000    0.0049
     4        1.2365             nan     0.1000    0.0030
     5        1.2123             nan     0.1000    0.0125
     6        1.2077             nan     0.1000   -0.0059
     7        1.1926             nan     0.1000   -0.0024
     8        1.1676             nan     0.1000   -0.0019
     9        1.1554             nan     0.1000   -0.0011
    10        1.1413             nan     0.1000   -0.0013
    20        0.9933             nan     0.1000   -0.0041
    40        0.8140             nan     0.1000    0.0010
    60        0.7007             nan     0.1000   -0.0043
    80        0.5858             nan     0.1000   -0.0024
   100        0.5170             nan     0.1000   -0.0027
   120        0.4552             nan     0.1000   -0.0016
   140        0.4030             nan     0.1000    0.0004
   160        0.3462             nan     0.1000   -0.0010
   180        0.3057             nan     0.1000   -0.0016
   200        0.2690             nan     0.1000   -0.0001
   220        0.2348             nan     0.1000    0.0005
   240        0.2067             nan     0.1000   -0.0010
   260        0.1830             nan     0.1000   -0.0013
   280        0.1650             nan     0.1000   -0.0010
   300        0.1454             nan     0.1000   -0.0011
   320        0.1276             nan     0.1000   -0.0005
   340        0.1136             nan     0.1000   -0.0004
   360        0.1022             nan     0.1000   -0.0007
   380        0.0898             nan     0.1000   -0.0007
   400        0.0802             nan     0.1000   -0.0006
   420        0.0700             nan     0.1000   -0.0000
   440        0.0624             nan     0.1000   -0.0002
   460        0.0557             nan     0.1000   -0.0002
   480        0.0496             nan     0.1000   -0.0002
   500        0.0439             nan     0.1000   -0.0001
Using 100 trees...

Using 100 trees...

Warning: There were missing values in resampled performance measures.Warning: missing values found in aggregated results
Iter   TrainDeviance   ValidDeviance   StepSize   Improve
     1        1.3466             nan     0.1000    0.0083
     2        1.3232             nan     0.1000    0.0073
     3        1.2755             nan     0.1000    0.0103
     4        1.2409             nan     0.1000    0.0092
     5        1.2080             nan     0.1000    0.0031
     6        1.1875             nan     0.1000    0.0002
     7        1.1681             nan     0.1000   -0.0041
     8        1.1462             nan     0.1000    0.0033
     9        1.1219             nan     0.1000    0.0019
    10        1.0909             nan     0.1000    0.0059
    20        0.9208             nan     0.1000   -0.0054
    40        0.6558             nan     0.1000   -0.0054
    60        0.5011             nan     0.1000   -0.0002
    80        0.3916             nan     0.1000   -0.0016
   100        0.3172             nan     0.1000   -0.0032
   120        0.2574             nan     0.1000   -0.0003
   140        0.2066             nan     0.1000   -0.0024
   160        0.1695             nan     0.1000   -0.0004
   180        0.1348             nan     0.1000    0.0004
   200        0.1085             nan     0.1000   -0.0007
Using 100 trees...

Using 100 trees...

Warning: There were missing values in resampled performance measures.Warning: missing values found in aggregated results
Iter   TrainDeviance   ValidDeviance   StepSize   Improve
     1        1.3445             nan     0.1000    0.0077
     2        1.3293             nan     0.1000   -0.0055
     3        1.2961             nan     0.1000    0.0121
     4        1.2660             nan     0.1000    0.0056
     5        1.2317             nan     0.1000    0.0137
     6        1.2015             nan     0.1000    0.0085
     7        1.1794             nan     0.1000    0.0040
     8        1.1620             nan     0.1000    0.0007
     9        1.1504             nan     0.1000   -0.0029
    10        1.1377             nan     0.1000    0.0001
    20        0.9625             nan     0.1000    0.0010
    40        0.7693             nan     0.1000   -0.0000
    60        0.6309             nan     0.1000   -0.0014
    80        0.5340             nan     0.1000   -0.0003
   100        0.4507             nan     0.1000   -0.0015
   120        0.3716             nan     0.1000   -0.0006
   140        0.3119             nan     0.1000   -0.0012
   160        0.2687             nan     0.1000   -0.0018
   180        0.2315             nan     0.1000   -0.0015
   200        0.1976             nan     0.1000   -0.0008
Using 100 trees...

Using 100 trees...

Warning: There were missing values in resampled performance measures.Warning: missing values found in aggregated results
Iter   TrainDeviance   ValidDeviance   StepSize   Improve
     1        1.3050             nan     0.1000    0.0222
     2        1.2206             nan     0.1000    0.0333
     3        1.1574             nan     0.1000    0.0150
     4        1.1119             nan     0.1000    0.0055
     5        1.0518             nan     0.1000    0.0155
     6        0.9941             nan     0.1000    0.0122
     7        0.9370             nan     0.1000    0.0137
     8        0.8946             nan     0.1000    0.0045
     9        0.8537             nan     0.1000    0.0065
    10        0.8113             nan     0.1000    0.0081
    20        0.5506             nan     0.1000    0.0020
    40        0.2561             nan     0.1000   -0.0033
    60        0.1443             nan     0.1000   -0.0019
    80        0.0804             nan     0.1000   -0.0005
   100        0.0438             nan     0.1000   -0.0004
   120        0.0275             nan     0.1000   -0.0003
   140        0.0143             nan     0.1000   -0.0003
   160        0.0110             nan     0.1000   -0.0005
   180        0.0073             nan     0.1000   -0.0004
   200        0.0032             nan     0.1000   -0.0000
Using 100 trees...

Using 100 trees...

Warning: There were missing values in resampled performance measures.Warning: missing values found in aggregated results
Iter   TrainDeviance   ValidDeviance   StepSize   Improve
     1        1.3328             nan     0.1000    0.0077
     2        1.2347             nan     0.1000    0.0247
     3        1.1658             nan     0.1000    0.0185
     4        1.1056             nan     0.1000    0.0233
     5        1.0569             nan     0.1000    0.0070
     6        1.0102             nan     0.1000    0.0095
     7        0.9664             nan     0.1000    0.0101
     8        0.9295             nan     0.1000    0.0088
     9        0.8905             nan     0.1000    0.0087
    10        0.8568             nan     0.1000    0.0069
    20        0.5916             nan     0.1000    0.0012
    40        0.3128             nan     0.1000   -0.0012
    60        0.1781             nan     0.1000   -0.0003
    80        0.1032             nan     0.1000   -0.0001
   100        0.0639             nan     0.1000   -0.0006
Using 100 trees...

Using 100 trees...

Warning: There were missing values in resampled performance measures.Warning: missing values found in aggregated results
Iter   TrainDeviance   ValidDeviance   StepSize   Improve
     1        1.3379             nan     0.1000    0.0098
     2        1.2789             nan     0.1000    0.0206
     3        1.2369             nan     0.1000    0.0051
     4        1.1947             nan     0.1000    0.0058
     5        1.1650             nan     0.1000    0.0054
     6        1.1103             nan     0.1000    0.0139
     7        1.0885             nan     0.1000   -0.0034
     8        1.0631             nan     0.1000   -0.0001
     9        1.0368             nan     0.1000    0.0038
    10        1.0012             nan     0.1000    0.0089
    20        0.8001             nan     0.1000    0.0024
    40        0.5484             nan     0.1000    0.0037
    60        0.4204             nan     0.1000   -0.0028
    80        0.3214             nan     0.1000   -0.0000
   100        0.2467             nan     0.1000   -0.0007
   120        0.1907             nan     0.1000   -0.0001
   140        0.1516             nan     0.1000   -0.0007
   160        0.1203             nan     0.1000   -0.0005
   180        0.0959             nan     0.1000   -0.0006
   200        0.0746             nan     0.1000   -0.0002
Using 100 trees...

Using 100 trees...

Warning: There were missing values in resampled performance measures.Warning: missing values found in aggregated results
Iter   TrainDeviance   ValidDeviance   StepSize   Improve
     1        1.2767             nan     0.1000    0.0331
     2        1.1989             nan     0.1000    0.0257
     3        1.1590             nan     0.1000   -0.0011
     4        1.1010             nan     0.1000    0.0179
     5        1.0480             nan     0.1000    0.0175
     6        1.0151             nan     0.1000   -0.0002
     7        0.9757             nan     0.1000    0.0034
     8        0.9473             nan     0.1000    0.0000
     9        0.8983             nan     0.1000    0.0181
    10        0.8509             nan     0.1000    0.0080
    20        0.6015             nan     0.1000   -0.0039
    40        0.3143             nan     0.1000   -0.0002
    60        0.1762             nan     0.1000    0.0003
    80        0.1088             nan     0.1000    0.0000
   100        0.0666             nan     0.1000   -0.0002
Using 100 trees...

Using 100 trees...

Warning: There were missing values in resampled performance measures.Warning: missing values found in aggregated results
Iter   TrainDeviance   ValidDeviance   StepSize   Improve
     1        1.3550             nan     0.1000    0.0037
     2        1.3252             nan     0.1000    0.0113
     3        1.2963             nan     0.1000    0.0104
     4        1.2744             nan     0.1000    0.0011
     5        1.2504             nan     0.1000    0.0039
     6        1.2327             nan     0.1000    0.0004
     7        1.2141             nan     0.1000   -0.0027
     8        1.1885             nan     0.1000    0.0106
     9        1.1731             nan     0.1000    0.0004
    10        1.1538             nan     0.1000    0.0065
    20        1.0058             nan     0.1000   -0.0000
    40        0.7908             nan     0.1000   -0.0020
    60        0.6427             nan     0.1000   -0.0008
    80        0.5434             nan     0.1000    0.0020
   100        0.4527             nan     0.1000   -0.0017
   120        0.3751             nan     0.1000   -0.0031
   140        0.3146             nan     0.1000    0.0004
   160        0.2635             nan     0.1000    0.0001
   180        0.2199             nan     0.1000    0.0004
   200        0.1896             nan     0.1000   -0.0007
Using 100 trees...

Using 100 trees...

Warning: There were missing values in resampled performance measures.Warning: missing values found in aggregated results
Iter   TrainDeviance   ValidDeviance   StepSize   Improve
     1        1.2968             nan     0.1000    0.0205
     2        1.2346             nan     0.1000    0.0110
     3        1.1779             nan     0.1000    0.0132
     4        1.1277             nan     0.1000    0.0061
     5        1.0871             nan     0.1000    0.0067
     6        1.0367             nan     0.1000    0.0169
     7        0.9847             nan     0.1000    0.0150
     8        0.9209             nan     0.1000    0.0231
     9        0.8891             nan     0.1000    0.0062
    10        0.8402             nan     0.1000    0.0120
    20        0.5809             nan     0.1000    0.0039
    40        0.3290             nan     0.1000   -0.0010
    60        0.1966             nan     0.1000    0.0033
    80        0.1147             nan     0.1000   -0.0001
   100        0.0688             nan     0.1000    0.0006
Using 100 trees...

Using 100 trees...
# ROC curve
roc_c <- roc_curve(gbm_model, group)
Warning: Ignoring unknown aesthetics: by
# save the results
supplements_models[["models_summ"]][["gbm_model"]][[model_name]] <- gbm_model$model_summary
supplements_models[["roc_cs"]][["gbm_model"]][[model_name]] <- gbm_model$kfold_rocobjs

# see the results
gbm_model$model_summary %>% t()
                                       [,1]
n.trees                         100.0000000
interaction.depth                 5.0000000
shrinkage                         0.1000000
n.minobsinnode                   10.0000000
auc                               1.0000000
auc_optimism_corrected            0.5219912
auc_optimism_corrected_CIL        0.4425772
auc_optimism_corrected_CIU        0.6077597
accuracy                          1.0000000
accuracy_optimism_corrected       0.5170581
accuracy_optimism_corrected_CIL   0.4515944
accuracy_optimism_corrected_CIU   0.5991346
roc_c

pre_ltx vs post_ltx

group <- c("pre_ltx","post_ltx")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_ileum_uni_data <- binomial_prep(ileum_genus_tab,
                                     ileum_genus_taxa_tab,
                                     ileum_metadata,
                                     group, 
                                     usage="ml_clr")
Removing 46 ASV(s)
Removing 6 ASV(s)
# fit the model
gbm_model <- gbm_binomial(filt_ileum_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                              reuse=FALSE,
                              file=model_name,
                              Q="Q1",
                              overfitting_check = TRUE)
Iter   TrainDeviance   ValidDeviance   StepSize   Improve
     1        1.2683             nan     0.1000    0.0007
     2        1.2487             nan     0.1000    0.0022
     3        1.2406             nan     0.1000   -0.0010
     4        1.2289             nan     0.1000    0.0015
     5        1.2171             nan     0.1000   -0.0055
     6        1.2040             nan     0.1000   -0.0032
     7        1.1910             nan     0.1000   -0.0069
     8        1.1722             nan     0.1000    0.0021
     9        1.1558             nan     0.1000    0.0009
    10        1.1412             nan     0.1000    0.0032
    20        1.0331             nan     0.1000   -0.0027
    40        0.8764             nan     0.1000   -0.0012
    60        0.7330             nan     0.1000   -0.0009
    80        0.6215             nan     0.1000    0.0009
   100        0.5411             nan     0.1000   -0.0016
Using 100 trees...
Iter   TrainDeviance   ValidDeviance   StepSize   Improve
     1        1.2167             nan     0.1000    0.0277
     2        1.1840             nan     0.1000    0.0110
     3        1.1178             nan     0.1000    0.0184
     4        1.0918             nan     0.1000    0.0044
     5        1.0576             nan     0.1000    0.0081
     6        1.0328             nan     0.1000    0.0038
     7        0.9937             nan     0.1000    0.0120
     8        0.9576             nan     0.1000    0.0098
     9        0.9207             nan     0.1000    0.0004
    10        0.8869             nan     0.1000    0.0123
    20        0.6769             nan     0.1000   -0.0005
    40        0.4068             nan     0.1000    0.0015
    60        0.2542             nan     0.1000    0.0005
    80        0.1628             nan     0.1000    0.0001
   100        0.1080             nan     0.1000    0.0001
   120        0.0734             nan     0.1000   -0.0005
   140        0.0503             nan     0.1000   -0.0001
   160        0.0357             nan     0.1000   -0.0003
   180        0.0265             nan     0.1000   -0.0002
   200        0.0189             nan     0.1000   -0.0002
   220        0.0137             nan     0.1000    0.0000
   240        0.0099             nan     0.1000   -0.0002
   260        0.0072             nan     0.1000   -0.0001
   280        0.0051             nan     0.1000   -0.0001
   300        0.0037             nan     0.1000    0.0000
   320        0.0026             nan     0.1000   -0.0000
   340        0.0017             nan     0.1000   -0.0000
   360        0.0011             nan     0.1000   -0.0000
   380        0.0009             nan     0.1000   -0.0000
   400        0.0006             nan     0.1000   -0.0000
   420        0.0004             nan     0.1000   -0.0000
   440        0.0003             nan     0.1000    0.0000
   460        0.0002             nan     0.1000   -0.0000
   480        0.0002             nan     0.1000   -0.0000
   500        0.0001             nan     0.1000   -0.0000
Using 100 trees...

Using 100 trees...
Iter   TrainDeviance   ValidDeviance   StepSize   Improve
     1        1.2583             nan     0.1000    0.0185
     2        1.2003             nan     0.1000    0.0212
     3        1.1318             nan     0.1000    0.0182
     4        1.0666             nan     0.1000    0.0172
     5        1.0226             nan     0.1000    0.0057
     6        0.9795             nan     0.1000    0.0149
     7        0.9323             nan     0.1000    0.0117
     8        0.8840             nan     0.1000    0.0141
     9        0.8362             nan     0.1000    0.0166
    10        0.7890             nan     0.1000    0.0127
    20        0.5348             nan     0.1000    0.0023
    40        0.2566             nan     0.1000   -0.0008
    60        0.1357             nan     0.1000   -0.0000
    80        0.0702             nan     0.1000   -0.0001
   100        0.0386             nan     0.1000    0.0002
   120        0.0216             nan     0.1000    0.0000
   140        0.0124             nan     0.1000    0.0001
   160        0.0071             nan     0.1000   -0.0000
   180        0.0040             nan     0.1000    0.0000
   200        0.0022             nan     0.1000   -0.0000
   220        0.0012             nan     0.1000   -0.0000
   240        0.0007             nan     0.1000    0.0000
   260        0.0004             nan     0.1000    0.0000
   280        0.0002             nan     0.1000    0.0000
   300        0.0001             nan     0.1000   -0.0000
   320        0.0001             nan     0.1000    0.0000
   340        0.0000             nan     0.1000    0.0000
   360        0.0000             nan     0.1000    0.0000
   380        0.0000             nan     0.1000   -0.0000
   400        0.0000             nan     0.1000   -0.0000
   420        0.0000             nan     0.1000   -0.0000
   440        0.0000             nan     0.1000   -0.0000
   460        0.0000             nan     0.1000    0.0000
   480        0.0000             nan     0.1000   -0.0000
   500        0.0000             nan     0.1000   -0.0000
Using 100 trees...

Using 100 trees...
Iter   TrainDeviance   ValidDeviance   StepSize   Improve
     1        1.2990             nan     0.1000    0.0048
     2        1.2667             nan     0.1000    0.0127
     3        1.2462             nan     0.1000    0.0053
     4        1.2217             nan     0.1000    0.0090
     5        1.1995             nan     0.1000    0.0044
     6        1.1829             nan     0.1000    0.0021
     7        1.1694             nan     0.1000    0.0022
     8        1.1531             nan     0.1000    0.0058
     9        1.1344             nan     0.1000    0.0054
    10        1.1142             nan     0.1000    0.0073
    20        0.9741             nan     0.1000    0.0005
    40        0.7976             nan     0.1000   -0.0005
    60        0.6656             nan     0.1000    0.0013
    80        0.5650             nan     0.1000   -0.0023
   100        0.4863             nan     0.1000   -0.0010
   120        0.4236             nan     0.1000   -0.0020
   140        0.3707             nan     0.1000   -0.0002
   160        0.3286             nan     0.1000   -0.0018
   180        0.2904             nan     0.1000   -0.0014
   200        0.2595             nan     0.1000   -0.0003
Using 100 trees...

Using 100 trees...
Iter   TrainDeviance   ValidDeviance   StepSize   Improve
     1        1.2211             nan     0.1000    0.0299
     2        1.1691             nan     0.1000    0.0139
     3        1.1228             nan     0.1000    0.0053
     4        1.0758             nan     0.1000    0.0129
     5        1.0275             nan     0.1000    0.0084
     6        0.9848             nan     0.1000    0.0081
     7        0.9409             nan     0.1000    0.0088
     8        0.9016             nan     0.1000    0.0094
     9        0.8621             nan     0.1000    0.0063
    10        0.8168             nan     0.1000    0.0106
    20        0.5450             nan     0.1000   -0.0008
    40        0.2750             nan     0.1000   -0.0006
    60        0.1469             nan     0.1000   -0.0001
    80        0.0843             nan     0.1000   -0.0009
   100        0.0481             nan     0.1000   -0.0002
   120        0.0276             nan     0.1000    0.0001
   140        0.0163             nan     0.1000   -0.0001
   160        0.0098             nan     0.1000   -0.0000
   180        0.0063             nan     0.1000   -0.0000
   200        0.0037             nan     0.1000   -0.0000
   220        0.0024             nan     0.1000   -0.0000
   240        0.0015             nan     0.1000   -0.0000
   260        0.0009             nan     0.1000   -0.0000
   280        0.0006             nan     0.1000   -0.0000
   300        0.0003             nan     0.1000   -0.0000
   320        0.0002             nan     0.1000    0.0000
   340        0.0002             nan     0.1000   -0.0000
   360        0.0001             nan     0.1000   -0.0000
   380        0.0001             nan     0.1000   -0.0000
   400        0.0000             nan     0.1000   -0.0000
   420        0.0000             nan     0.1000   -0.0000
   440        0.0000             nan     0.1000   -0.0000
   460        0.0000             nan     0.1000   -0.0000
   480        0.0000             nan     0.1000   -0.0000
   500        0.0000             nan     0.1000    0.0000
Using 100 trees...

Using 100 trees...
Iter   TrainDeviance   ValidDeviance   StepSize   Improve
     1        1.2620             nan     0.1000    0.0219
     2        1.1922             nan     0.1000    0.0262
     3        1.1480             nan     0.1000    0.0108
     4        1.1156             nan     0.1000    0.0039
     5        1.0639             nan     0.1000    0.0232
     6        1.0185             nan     0.1000    0.0145
     7        0.9782             nan     0.1000    0.0104
     8        0.9542             nan     0.1000    0.0001
     9        0.9263             nan     0.1000    0.0040
    10        0.8850             nan     0.1000    0.0122
    20        0.6470             nan     0.1000    0.0005
    40        0.3734             nan     0.1000    0.0030
    60        0.2337             nan     0.1000    0.0004
    80        0.1439             nan     0.1000   -0.0006
   100        0.0909             nan     0.1000    0.0000
Using 100 trees...

Using 100 trees...
Iter   TrainDeviance   ValidDeviance   StepSize   Improve
     1        1.2695             nan     0.1000    0.0110
     2        1.2097             nan     0.1000    0.0165
     3        1.1706             nan     0.1000    0.0136
     4        1.1297             nan     0.1000    0.0106
     5        1.0905             nan     0.1000    0.0033
     6        1.0620             nan     0.1000    0.0053
     7        1.0329             nan     0.1000    0.0027
     8        1.0020             nan     0.1000    0.0016
     9        0.9858             nan     0.1000    0.0020
    10        0.9527             nan     0.1000    0.0110
    20        0.7379             nan     0.1000   -0.0015
    40        0.4749             nan     0.1000   -0.0004
    60        0.3318             nan     0.1000   -0.0006
    80        0.2278             nan     0.1000    0.0007
   100        0.1613             nan     0.1000    0.0002
Using 100 trees...

Using 100 trees...
Iter   TrainDeviance   ValidDeviance   StepSize   Improve
     1        1.2097             nan     0.1000    0.0101
     2        1.1706             nan     0.1000    0.0105
     3        1.1542             nan     0.1000   -0.0015
     4        1.1251             nan     0.1000    0.0095
     5        1.1017             nan     0.1000    0.0036
     6        1.0720             nan     0.1000    0.0012
     7        1.0367             nan     0.1000    0.0057
     8        1.0179             nan     0.1000    0.0000
     9        0.9916             nan     0.1000    0.0062
    10        0.9616             nan     0.1000    0.0101
    20        0.7469             nan     0.1000   -0.0027
    40        0.4640             nan     0.1000    0.0007
    60        0.3184             nan     0.1000   -0.0012
    80        0.2080             nan     0.1000   -0.0007
   100        0.1463             nan     0.1000   -0.0010
Using 100 trees...

Using 100 trees...
Iter   TrainDeviance   ValidDeviance   StepSize   Improve
     1        1.2819             nan     0.1000    0.0033
     2        1.2291             nan     0.1000    0.0125
     3        1.1768             nan     0.1000    0.0156
     4        1.1117             nan     0.1000    0.0215
     5        1.0669             nan     0.1000    0.0117
     6        1.0289             nan     0.1000    0.0112
     7        0.9915             nan     0.1000    0.0069
     8        0.9519             nan     0.1000    0.0097
     9        0.9083             nan     0.1000    0.0144
    10        0.8814             nan     0.1000    0.0027
    20        0.6689             nan     0.1000    0.0010
    40        0.3905             nan     0.1000   -0.0015
    60        0.2519             nan     0.1000   -0.0001
    80        0.1674             nan     0.1000   -0.0006
   100        0.1143             nan     0.1000   -0.0007
Using 100 trees...

Using 100 trees...
Iter   TrainDeviance   ValidDeviance   StepSize   Improve
     1        1.2236             nan     0.1000    0.0151
     2        1.1714             nan     0.1000    0.0139
     3        1.1097             nan     0.1000    0.0182
     4        1.0561             nan     0.1000    0.0155
     5        1.0134             nan     0.1000    0.0139
     6        0.9758             nan     0.1000    0.0086
     7        0.9354             nan     0.1000    0.0135
     8        0.9011             nan     0.1000    0.0114
     9        0.8733             nan     0.1000    0.0066
    10        0.8378             nan     0.1000    0.0097
    20        0.6090             nan     0.1000    0.0000
    40        0.3542             nan     0.1000    0.0016
    60        0.2241             nan     0.1000   -0.0013
    80        0.1508             nan     0.1000   -0.0009
   100        0.1035             nan     0.1000   -0.0005
Using 100 trees...

Using 100 trees...
Iter   TrainDeviance   ValidDeviance   StepSize   Improve
     1        1.2612             nan     0.1000    0.0137
     2        1.2288             nan     0.1000    0.0061
     3        1.1939             nan     0.1000    0.0109
     4        1.1832             nan     0.1000   -0.0027
     5        1.1677             nan     0.1000    0.0032
     6        1.1355             nan     0.1000    0.0061
     7        1.1135             nan     0.1000    0.0029
     8        1.0922             nan     0.1000    0.0018
     9        1.0709             nan     0.1000   -0.0002
    10        1.0464             nan     0.1000    0.0081
    20        0.8811             nan     0.1000    0.0017
    40        0.6671             nan     0.1000   -0.0013
    60        0.5276             nan     0.1000    0.0014
    80        0.4093             nan     0.1000   -0.0004
   100        0.3291             nan     0.1000   -0.0005
   120        0.2686             nan     0.1000   -0.0014
   140        0.2156             nan     0.1000   -0.0012
   160        0.1746             nan     0.1000    0.0000
   180        0.1407             nan     0.1000   -0.0002
   200        0.1190             nan     0.1000   -0.0002
   220        0.0972             nan     0.1000   -0.0004
   240        0.0802             nan     0.1000   -0.0002
   260        0.0660             nan     0.1000   -0.0000
   280        0.0557             nan     0.1000   -0.0006
   300        0.0471             nan     0.1000   -0.0005
   320        0.0390             nan     0.1000   -0.0002
   340        0.0322             nan     0.1000   -0.0002
   360        0.0266             nan     0.1000   -0.0002
   380        0.0220             nan     0.1000   -0.0000
   400        0.0182             nan     0.1000   -0.0000
   420        0.0152             nan     0.1000   -0.0000
   440        0.0122             nan     0.1000   -0.0002
   460        0.0101             nan     0.1000   -0.0000
   480        0.0085             nan     0.1000   -0.0001
   500        0.0069             nan     0.1000   -0.0000
Using 100 trees...

Using 100 trees...
# ROC curve
roc_c <- roc_curve(gbm_model, group)
Warning: Ignoring unknown aesthetics: by
# save the results
supplements_models[["models_summ"]][["gbm_model"]][[model_name]] <- gbm_model$model_summary
supplements_models[["roc_cs"]][["gbm_model"]][[model_name]] <- gbm_model$kfold_rocobjs

# see the results
gbm_model$model_summary %>% t()
                                       [,1]
n.trees                         100.0000000
interaction.depth                 3.0000000
shrinkage                         0.1000000
n.minobsinnode                   30.0000000
auc                               0.9919255
auc_optimism_corrected            0.5826131
auc_optimism_corrected_CIL        0.5518379
auc_optimism_corrected_CIU        0.6214075
accuracy                          0.9326923
accuracy_optimism_corrected       0.6621439
accuracy_optimism_corrected_CIL   0.6152162
accuracy_optimism_corrected_CIU   0.7468750
roc_c

post_ltx vs healthy

group <- c("post_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_ileum_uni_data <- binomial_prep(ileum_genus_tab,
                                     ileum_genus_taxa_tab,
                                     ileum_metadata,
                                     group, 
                                     usage="ml_clr")
Removing 45 ASV(s)
Removing 2 ASV(s)
# fit the model
gbm_model <- gbm_binomial(filt_ileum_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                              reuse=FALSE,
                              file=model_name,
                              Q="Q1",
                              overfitting_check = TRUE)
Iter   TrainDeviance   ValidDeviance   StepSize   Improve
     1        1.2838             nan     0.1000   -0.0092
     2        1.2774             nan     0.1000   -0.0044
     3        1.2723             nan     0.1000   -0.0032
     4        1.2650             nan     0.1000    0.0012
     5        1.2556             nan     0.1000   -0.0016
     6        1.2479             nan     0.1000   -0.0021
     7        1.2407             nan     0.1000   -0.0015
     8        1.2367             nan     0.1000   -0.0047
     9        1.2325             nan     0.1000   -0.0032
    10        1.2240             nan     0.1000   -0.0001
    20        1.1738             nan     0.1000   -0.0030
    40        1.0973             nan     0.1000   -0.0018
    60        1.0121             nan     0.1000   -0.0048
    80        0.9433             nan     0.1000   -0.0043
   100        0.8845             nan     0.1000   -0.0026
Using 100 trees...
Iter   TrainDeviance   ValidDeviance   StepSize   Improve
     1        1.2914             nan     0.1000    0.0012
     2        1.2741             nan     0.1000    0.0004
     3        1.2569             nan     0.1000    0.0027
     4        1.2424             nan     0.1000   -0.0007
     5        1.2272             nan     0.1000    0.0012
     6        1.2089             nan     0.1000    0.0058
     7        1.1903             nan     0.1000    0.0020
     8        1.1759             nan     0.1000    0.0030
     9        1.1634             nan     0.1000    0.0009
    10        1.1491             nan     0.1000   -0.0009
    20        1.0331             nan     0.1000   -0.0003
    40        0.8678             nan     0.1000   -0.0023
    60        0.7422             nan     0.1000   -0.0011
    80        0.6361             nan     0.1000   -0.0005
   100        0.5459             nan     0.1000   -0.0006
   120        0.4770             nan     0.1000    0.0000
   140        0.4151             nan     0.1000   -0.0007
   160        0.3666             nan     0.1000   -0.0011
   180        0.3220             nan     0.1000   -0.0021
   200        0.2865             nan     0.1000   -0.0009
   220        0.2540             nan     0.1000   -0.0014
   240        0.2239             nan     0.1000    0.0000
   260        0.1971             nan     0.1000   -0.0005
   280        0.1785             nan     0.1000   -0.0007
   300        0.1614             nan     0.1000   -0.0001
   320        0.1452             nan     0.1000   -0.0006
   340        0.1287             nan     0.1000   -0.0007
   360        0.1130             nan     0.1000   -0.0000
   380        0.1011             nan     0.1000   -0.0005
   400        0.0908             nan     0.1000   -0.0002
   420        0.0820             nan     0.1000   -0.0001
   440        0.0737             nan     0.1000   -0.0000
   460        0.0655             nan     0.1000   -0.0001
   480        0.0593             nan     0.1000   -0.0002
   500        0.0535             nan     0.1000   -0.0005
Using 100 trees...

Using 100 trees...
Iter   TrainDeviance   ValidDeviance   StepSize   Improve
     1        1.2560             nan     0.1000    0.0018
     2        1.2010             nan     0.1000    0.0111
     3        1.1487             nan     0.1000    0.0056
     4        1.0803             nan     0.1000    0.0277
     5        1.0411             nan     0.1000    0.0032
     6        0.9953             nan     0.1000    0.0107
     7        0.9584             nan     0.1000    0.0017
     8        0.9247             nan     0.1000    0.0054
     9        0.8874             nan     0.1000    0.0027
    10        0.8496             nan     0.1000    0.0133
    20        0.5815             nan     0.1000    0.0011
    40        0.2876             nan     0.1000    0.0011
    60        0.1582             nan     0.1000   -0.0002
    80        0.0873             nan     0.1000   -0.0007
   100        0.0500             nan     0.1000   -0.0003
   120        0.0297             nan     0.1000   -0.0004
   140        0.0175             nan     0.1000   -0.0002
   160        0.0106             nan     0.1000   -0.0000
   180        0.0060             nan     0.1000    0.0000
   200        0.0036             nan     0.1000    0.0000
Using 100 trees...

Using 100 trees...
Iter   TrainDeviance   ValidDeviance   StepSize   Improve
     1        1.3011             nan     0.1000    0.0009
     2        1.2808             nan     0.1000    0.0095
     3        1.2662             nan     0.1000    0.0002
     4        1.2465             nan     0.1000    0.0026
     5        1.2276             nan     0.1000    0.0073
     6        1.2098             nan     0.1000    0.0023
     7        1.1957             nan     0.1000    0.0042
     8        1.1856             nan     0.1000   -0.0011
     9        1.1712             nan     0.1000    0.0029
    10        1.1607             nan     0.1000   -0.0005
    20        1.0600             nan     0.1000   -0.0012
    40        0.8825             nan     0.1000   -0.0016
    60        0.7629             nan     0.1000   -0.0001
    80        0.6741             nan     0.1000   -0.0008
   100        0.6018             nan     0.1000   -0.0006
   120        0.5435             nan     0.1000   -0.0027
   140        0.4800             nan     0.1000   -0.0002
   160        0.4297             nan     0.1000   -0.0010
   180        0.3907             nan     0.1000   -0.0015
   200        0.3541             nan     0.1000   -0.0011
Using 100 trees...

Using 100 trees...
Iter   TrainDeviance   ValidDeviance   StepSize   Improve
     1        1.2446             nan     0.1000    0.0024
     2        1.2079             nan     0.1000    0.0043
     3        1.1670             nan     0.1000    0.0029
     4        1.1338             nan     0.1000    0.0036
     5        1.0952             nan     0.1000    0.0076
     6        1.0654             nan     0.1000    0.0010
     7        1.0328             nan     0.1000    0.0080
     8        0.9991             nan     0.1000    0.0085
     9        0.9726             nan     0.1000    0.0055
    10        0.9547             nan     0.1000    0.0001
    20        0.7570             nan     0.1000    0.0017
    40        0.5173             nan     0.1000    0.0005
    60        0.3630             nan     0.1000   -0.0006
    80        0.2561             nan     0.1000    0.0001
   100        0.1842             nan     0.1000   -0.0004
   120        0.1374             nan     0.1000   -0.0002
   140        0.1022             nan     0.1000   -0.0008
   160        0.0745             nan     0.1000   -0.0008
   180        0.0562             nan     0.1000    0.0001
   200        0.0412             nan     0.1000   -0.0001
   220        0.0311             nan     0.1000   -0.0002
   240        0.0236             nan     0.1000   -0.0001
   260        0.0179             nan     0.1000   -0.0000
   280        0.0136             nan     0.1000    0.0000
   300        0.0105             nan     0.1000   -0.0001
   320        0.0076             nan     0.1000   -0.0000
   340        0.0056             nan     0.1000   -0.0000
   360        0.0041             nan     0.1000   -0.0000
   380        0.0033             nan     0.1000   -0.0001
   400        0.0024             nan     0.1000   -0.0000
   420        0.0019             nan     0.1000   -0.0000
   440        0.0015             nan     0.1000   -0.0000
   460        0.0011             nan     0.1000   -0.0000
   480        0.0009             nan     0.1000   -0.0000
   500        0.0006             nan     0.1000   -0.0000
Using 100 trees...

Using 100 trees...
Iter   TrainDeviance   ValidDeviance   StepSize   Improve
     1        1.1361             nan     0.1000    0.0015
     2        1.1205             nan     0.1000    0.0028
     3        1.0984             nan     0.1000    0.0067
     4        1.0881             nan     0.1000    0.0009
     5        1.0773             nan     0.1000   -0.0082
     6        1.0613             nan     0.1000   -0.0000
     7        1.0514             nan     0.1000   -0.0013
     8        1.0364             nan     0.1000    0.0013
     9        1.0286             nan     0.1000   -0.0024
    10        1.0161             nan     0.1000    0.0034
    20        0.9200             nan     0.1000    0.0020
    40        0.7681             nan     0.1000   -0.0001
    60        0.6444             nan     0.1000   -0.0004
    80        0.5666             nan     0.1000   -0.0006
   100        0.5010             nan     0.1000    0.0002
   120        0.4411             nan     0.1000   -0.0016
   140        0.3914             nan     0.1000    0.0000
   160        0.3467             nan     0.1000   -0.0011
   180        0.3076             nan     0.1000   -0.0008
   200        0.2743             nan     0.1000   -0.0014
Using 100 trees...

Using 100 trees...
Iter   TrainDeviance   ValidDeviance   StepSize   Improve
     1        1.2733             nan     0.1000    0.0099
     2        1.2333             nan     0.1000    0.0030
     3        1.1992             nan     0.1000    0.0067
     4        1.1519             nan     0.1000    0.0170
     5        1.1209             nan     0.1000    0.0022
     6        1.0877             nan     0.1000    0.0082
     7        1.0618             nan     0.1000    0.0032
     8        1.0270             nan     0.1000    0.0063
     9        1.0048             nan     0.1000    0.0005
    10        0.9782             nan     0.1000    0.0047
    20        0.7707             nan     0.1000   -0.0061
    40        0.4897             nan     0.1000    0.0004
    60        0.3420             nan     0.1000    0.0004
    80        0.2433             nan     0.1000    0.0005
   100        0.1758             nan     0.1000   -0.0017
   120        0.1282             nan     0.1000   -0.0007
   140        0.0933             nan     0.1000   -0.0003
   160        0.0687             nan     0.1000   -0.0005
   180        0.0508             nan     0.1000    0.0001
   200        0.0377             nan     0.1000   -0.0002
Using 100 trees...

Using 100 trees...
Iter   TrainDeviance   ValidDeviance   StepSize   Improve
     1        1.2729             nan     0.1000    0.0045
     2        1.2357             nan     0.1000    0.0099
     3        1.1916             nan     0.1000    0.0051
     4        1.1456             nan     0.1000    0.0141
     5        1.0914             nan     0.1000    0.0140
     6        1.0637             nan     0.1000    0.0011
     7        1.0433             nan     0.1000   -0.0064
     8        1.0239             nan     0.1000    0.0037
     9        1.0016             nan     0.1000    0.0036
    10        0.9647             nan     0.1000    0.0053
    20        0.7634             nan     0.1000   -0.0030
    40        0.5029             nan     0.1000    0.0006
    60        0.3518             nan     0.1000   -0.0021
    80        0.2450             nan     0.1000    0.0008
   100        0.1738             nan     0.1000   -0.0000
Using 100 trees...

Using 100 trees...
Iter   TrainDeviance   ValidDeviance   StepSize   Improve
     1        1.2799             nan     0.1000    0.0078
     2        1.2434             nan     0.1000    0.0065
     3        1.1956             nan     0.1000    0.0144
     4        1.1689             nan     0.1000    0.0028
     5        1.1360             nan     0.1000    0.0056
     6        1.1078             nan     0.1000    0.0038
     7        1.0664             nan     0.1000    0.0118
     8        1.0307             nan     0.1000    0.0056
     9        1.0070             nan     0.1000    0.0035
    10        0.9771             nan     0.1000    0.0036
    20        0.7758             nan     0.1000    0.0005
    40        0.5067             nan     0.1000   -0.0045
    60        0.3358             nan     0.1000   -0.0022
    80        0.2340             nan     0.1000    0.0000
   100        0.1689             nan     0.1000   -0.0003
   120        0.1175             nan     0.1000    0.0001
   140        0.0841             nan     0.1000    0.0003
   160        0.0611             nan     0.1000   -0.0002
   180        0.0445             nan     0.1000   -0.0001
   200        0.0318             nan     0.1000    0.0001
   220        0.0228             nan     0.1000    0.0001
   240        0.0169             nan     0.1000   -0.0001
   260        0.0131             nan     0.1000   -0.0001
   280        0.0096             nan     0.1000   -0.0001
   300        0.0070             nan     0.1000   -0.0000
   320        0.0050             nan     0.1000   -0.0000
   340        0.0036             nan     0.1000   -0.0000
   360        0.0026             nan     0.1000   -0.0000
   380        0.0019             nan     0.1000    0.0000
   400        0.0014             nan     0.1000   -0.0000
   420        0.0010             nan     0.1000    0.0000
   440        0.0008             nan     0.1000   -0.0000
   460        0.0006             nan     0.1000    0.0000
   480        0.0004             nan     0.1000   -0.0000
   500        0.0003             nan     0.1000    0.0000
Using 100 trees...

Using 100 trees...
Iter   TrainDeviance   ValidDeviance   StepSize   Improve
     1        1.2917             nan     0.1000   -0.0021
     2        1.2695             nan     0.1000    0.0035
     3        1.2550             nan     0.1000   -0.0022
     4        1.2303             nan     0.1000    0.0067
     5        1.2188             nan     0.1000   -0.0001
     6        1.2013             nan     0.1000    0.0065
     7        1.1897             nan     0.1000    0.0003
     8        1.1813             nan     0.1000   -0.0002
     9        1.1723             nan     0.1000   -0.0010
    10        1.1569             nan     0.1000    0.0029
    20        1.0445             nan     0.1000   -0.0035
    40        0.8863             nan     0.1000    0.0026
    60        0.7665             nan     0.1000   -0.0010
    80        0.6686             nan     0.1000   -0.0009
   100        0.5774             nan     0.1000    0.0004
   120        0.5135             nan     0.1000    0.0001
   140        0.4501             nan     0.1000   -0.0014
   160        0.3959             nan     0.1000   -0.0004
   180        0.3514             nan     0.1000   -0.0005
   200        0.3120             nan     0.1000   -0.0004
   220        0.2815             nan     0.1000   -0.0008
   240        0.2518             nan     0.1000   -0.0021
   260        0.2271             nan     0.1000   -0.0011
   280        0.2069             nan     0.1000   -0.0003
   300        0.1844             nan     0.1000   -0.0004
   320        0.1670             nan     0.1000   -0.0003
   340        0.1504             nan     0.1000   -0.0002
   360        0.1343             nan     0.1000   -0.0002
   380        0.1214             nan     0.1000   -0.0006
   400        0.1087             nan     0.1000    0.0002
   420        0.0978             nan     0.1000   -0.0003
   440        0.0872             nan     0.1000    0.0001
   460        0.0774             nan     0.1000   -0.0001
   480        0.0698             nan     0.1000   -0.0001
   500        0.0639             nan     0.1000   -0.0001
Using 100 trees...

Using 100 trees...
Iter   TrainDeviance   ValidDeviance   StepSize   Improve
     1        1.2467             nan     0.1000    0.0058
     2        1.2133             nan     0.1000   -0.0001
     3        1.1780             nan     0.1000    0.0058
     4        1.1482             nan     0.1000    0.0065
     5        1.1296             nan     0.1000   -0.0015
     6        1.1008             nan     0.1000    0.0067
     7        1.0678             nan     0.1000    0.0059
     8        1.0484             nan     0.1000    0.0014
     9        1.0282             nan     0.1000    0.0021
    10        0.9951             nan     0.1000    0.0040
    20        0.7597             nan     0.1000    0.0069
    40        0.4977             nan     0.1000   -0.0018
    60        0.3398             nan     0.1000   -0.0022
    80        0.2346             nan     0.1000   -0.0005
   100        0.1634             nan     0.1000   -0.0005
   120        0.1183             nan     0.1000   -0.0013
   140        0.0828             nan     0.1000   -0.0001
   160        0.0600             nan     0.1000   -0.0001
   180        0.0426             nan     0.1000   -0.0001
   200        0.0302             nan     0.1000   -0.0002
Using 100 trees...

Using 100 trees...
# ROC curve
roc_c <- roc_curve(gbm_model, group)
Warning: Ignoring unknown aesthetics: by
# save the results
supplements_models[["models_summ"]][["gbm_model"]][[model_name]] <- gbm_model$model_summary
supplements_models[["roc_cs"]][["gbm_model"]][[model_name]] <- gbm_model$kfold_rocobjs

# see the results
gbm_model$model_summary %>% t()
                                       [,1]
n.trees                         100.0000000
interaction.depth                 1.0000000
shrinkage                         0.1000000
n.minobsinnode                   30.0000000
auc                               0.9542386
auc_optimism_corrected            0.4121876
auc_optimism_corrected_CIL        0.3367588
auc_optimism_corrected_CIU        0.5335903
accuracy                          0.8388626
accuracy_optimism_corrected       0.4915539
accuracy_optimism_corrected_CIL   0.3776215
accuracy_optimism_corrected_CIU   0.5788824
roc_c

Relative abundances

Elastic net
ASV level
level="ASV"

pre_ltx vs healthy

group <- c("pre_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_ileum_uni_data <- binomial_prep(ileum_asv_tab,
                                     ileum_taxa_tab,
                                     ileum_metadata,
                                     group, usage="ml_ra")
Removing 1598 ASV(s)
Removing 146 ASV(s)
# fit the model
enet_model <- glmnet_binomial(filt_ileum_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                              reuse=FALSE,
                              file=model_name,
                              Q="Q1",
                              overfitting_check = TRUE)

# ROC curve
roc_c <- roc_curve(enet_model, group)
Warning: Ignoring unknown aesthetics: by
# save the results
supplements_models[["models_summ"]][["enet_model_ra"]][[model_name]] <- enet_model$model_summary

supplements_models[["roc_cs"]][["enet_model_ra"]][[model_name]] <- enet_model$kfold_rocobjs

# see the results
enet_model$model_summary %>% t()
                                       [,1]
alpha                             0.0000000
lambda                          101.8044909
auc                               0.9737769
auc_czech                         0.9622426
auc_no                            0.9884498
auc_optimism_corrected            0.4957353
auc_optimism_corrected_CIL        0.3553380
auc_optimism_corrected_CIU        0.5967854
accuracy                          0.5174825
accuracy_czech                          NaN
accuracy_no                       0.4390244
accuracy_optimism_corrected       0.4677574
accuracy_optimism_corrected_CIL   0.3763605
accuracy_optimism_corrected_CIU   0.5596841
roc_c

pre_ltx vs post_ltx

group <- c("pre_ltx","post_ltx")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_ileum_uni_data <- binomial_prep(ileum_asv_tab,
                                     ileum_taxa_tab,
                                     ileum_metadata,
                                     group, usage="ml_ra")
Removing 979 ASV(s)
Removing 68 ASV(s)
# fit the model
enet_model <- glmnet_binomial(filt_ileum_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                              reuse=FALSE,
                              file=model_name,
                              Q="Q1",
                              overfitting_check = TRUE)

# ROC curve
roc_c <- roc_curve(enet_model, group)
Warning: Ignoring unknown aesthetics: by
# save the results
supplements_models[["models_summ"]][["enet_model_ra"]][[model_name]] <- enet_model$model_summary

supplements_models[["roc_cs"]][["enet_model_ra"]][[model_name]] <- enet_model$kfold_rocobjs

# see the results
enet_model$model_summary %>% t()
                                     [,1]
alpha                           0.8000000
lambda                          0.1250960
auc                             0.5000000
auc_czech                       0.5000000
auc_no                          0.5000000
auc_optimism_corrected          0.4593353
auc_optimism_corrected_CIL      0.4024036
auc_optimism_corrected_CIU      0.5294085
accuracy                        0.6634615
accuracy_czech                        NaN
accuracy_no                     0.6219512
accuracy_optimism_corrected     0.6254869
accuracy_optimism_corrected_CIL 0.5583482
accuracy_optimism_corrected_CIU 0.7086390
roc_c

post_ltx vs healthy

group <- c("post_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)


# prepare the data
filt_ileum_uni_data <- binomial_prep(ileum_asv_tab,
                                     ileum_taxa_tab,
                                     ileum_metadata,
                                     group, usage="ml_ra")
Removing 641 ASV(s)
Removing 104 ASV(s)
# fit the model
enet_model <- glmnet_binomial(filt_ileum_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                              reuse=FALSE,
                              file=model_name,
                              Q="Q1",
                              overfitting_check = TRUE)

# ROC curve
roc_c <- roc_curve(enet_model, group)
Warning: Ignoring unknown aesthetics: by
# save the results
supplements_models[["models_summ"]][["enet_model_ra"]][[model_name]] <- enet_model$model_summary

supplements_models[["roc_cs"]][["enet_model_ra"]][[model_name]] <- enet_model$kfold_rocobjs

# see the results
enet_model$model_summary %>% t()
                                     [,1]
alpha                           0.6000000
lambda                          0.1685547
auc                             0.5449672
auc_czech                       0.5635101
auc_no                          0.5139860
auc_optimism_corrected          0.5309733
auc_optimism_corrected_CIL      0.4496319
auc_optimism_corrected_CIU      0.6138510
accuracy                        0.6540284
accuracy_czech                        NaN
accuracy_no                     0.5416667
accuracy_optimism_corrected     0.6226971
accuracy_optimism_corrected_CIL 0.5198351
accuracy_optimism_corrected_CIU 0.7231612
roc_c

Genus level
level="genus"

Aggregate taxa

genus_data <- aggregate_taxa(ileum_asv_tab,
                             ileum_taxa_tab,
                             taxonomic_level = level)

ileum_genus_tab <- genus_data[[1]]
ileum_genus_taxa_tab <- genus_data[[2]]

pre_ltx vs healthy

group <- c("pre_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_ileum_uni_data <- binomial_prep(ileum_genus_tab,
                                     ileum_genus_taxa_tab,
                                     ileum_metadata,
                                     group, 
                                     usage="ml_ra")
Removing 84 ASV(s)
Removing 10 ASV(s)
# fit the model
enet_model <- glmnet_binomial(filt_ileum_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                              reuse=FALSE,
                              file=model_name,
                              Q="Q1",
                              overfitting_check = TRUE)

# ROC curve
roc_c <- roc_curve(enet_model, group)
Warning: Ignoring unknown aesthetics: by
# save the results
supplements_models[["models_summ"]][["enet_model_ra"]][[model_name]] <- enet_model$model_summary

supplements_models[["roc_cs"]][["enet_model_ra"]][[model_name]] <- enet_model$kfold_rocobjs

# see the results
enet_model$model_summary %>% t()
                                      [,1]
alpha                            0.0000000
lambda                          94.1638485
auc                              0.9164384
auc_czech                        0.9383117
auc_no                           0.8982143
auc_optimism_corrected           0.4387094
auc_optimism_corrected_CIL       0.3321085
auc_optimism_corrected_CIU       0.5234336
accuracy                         0.5104895
accuracy_czech                         NaN
accuracy_no                      0.4878049
accuracy_optimism_corrected      0.4098286
accuracy_optimism_corrected_CIL  0.3319182
accuracy_optimism_corrected_CIU  0.4639881
roc_c

pre_ltx vs post_ltx

group <- c("pre_ltx","post_ltx")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)


# prepare the data
filt_ileum_uni_data <- binomial_prep(ileum_genus_tab,
                                     ileum_genus_taxa_tab,
                                     ileum_metadata,
                                     group, 
                                     usage="ml_ra")
Removing 46 ASV(s)
Removing 6 ASV(s)
# fit the model
enet_model <- glmnet_binomial(filt_ileum_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                              reuse=FALSE,
                              file=model_name,
                              Q="Q1",
                              overfitting_check = TRUE)

# ROC curve
roc_c <- roc_curve(enet_model, group)
Warning: Ignoring unknown aesthetics: by
# save the results
supplements_models[["models_summ"]][["enet_model_ra"]][[model_name]] <- enet_model$model_summary

supplements_models[["roc_cs"]][["enet_model_ra"]][[model_name]] <- enet_model$kfold_rocobjs

# see the results
enet_model$model_summary %>% t()
                                     [,1]
alpha                           0.8000000
lambda                          0.1280950
auc                             0.5000000
auc_czech                       0.5000000
auc_no                          0.5000000
auc_optimism_corrected          0.4931307
auc_optimism_corrected_CIL      0.4189474
auc_optimism_corrected_CIU      0.6465378
accuracy                        0.6634615
accuracy_czech                        NaN
accuracy_no                     0.6219512
accuracy_optimism_corrected     0.6227011
accuracy_optimism_corrected_CIL 0.5655556
accuracy_optimism_corrected_CIU 0.6749188
roc_c

post_ltx vs healthy

group <- c("post_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_ileum_uni_data <- binomial_prep(ileum_genus_tab,
                                     ileum_genus_taxa_tab,
                                     ileum_metadata,
                                     group, 
                                     usage="ml_ra")
Removing 45 ASV(s)
Removing 2 ASV(s)
# fit the model
enet_model <- glmnet_binomial(filt_ileum_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                              reuse=FALSE,
                              file=model_name,
                              Q="Q1",
                              overfitting_check = TRUE)

# ROC curve
roc_c <- roc_curve(enet_model, group)
Warning: Ignoring unknown aesthetics: by
# save the results
supplements_models[["models_summ"]][["enet_model_ra"]][[model_name]] <- enet_model$model_summary

supplements_models[["roc_cs"]][["enet_model_ra"]][[model_name]] <- enet_model$kfold_rocobjs

# see the results
enet_model$model_summary %>% t()
                                      [,1]
alpha                            0.0000000
lambda                          82.0231117
auc                              0.9085765
auc_czech                        0.9186869
auc_no                           0.8749029
auc_optimism_corrected           0.5433459
auc_optimism_corrected_CIL       0.4667255
auc_optimism_corrected_CIU       0.6218420
accuracy                         0.6540284
accuracy_czech                         NaN
accuracy_no                      0.5416667
accuracy_optimism_corrected      0.6312833
accuracy_optimism_corrected_CIL  0.5724764
accuracy_optimism_corrected_CIU  0.6932383
roc_c

kNN
ASV level
level="ASV"

pre_ltx vs healthy

group <- c("pre_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_ileum_uni_data <- binomial_prep(ileum_asv_tab,
                                     ileum_taxa_tab,
                                     ileum_metadata,
                                     group, usage="ml_ra")
Removing 1598 ASV(s)
Removing 146 ASV(s)
# fit the model
knn_model <- knn_binomial(filt_ileum_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                              reuse=FALSE,
                              file=model_name,
                              Q="Q1",
                              overfitting_check = TRUE)

# ROC curve
roc_c <- roc_curve(knn_model, group)
Warning: Ignoring unknown aesthetics: by
# save the results
supplements_models[["models_summ"]][["knn_model_ra"]][[model_name]] <- knn_model$model_summary

supplements_models[["roc_cs"]][["knn_model_ra"]][[model_name]] <- knn_model$kfold_rocobjs

# see the results
knn_model$model_summary %>% t()
                                      [,1]
k                               28.0000000
auc                              0.6268102
auc_optimism_corrected           0.4980865
auc_optimism_corrected_CIL       0.3825790
auc_optimism_corrected_CIU       0.6788756
accuracy                         0.4965035
accuracy_optimism_corrected      0.4908856
accuracy_optimism_corrected_CIL  0.3876451
accuracy_optimism_corrected_CIU  0.5826531
roc_c

pre_ltx vs post_ltx

group <- c("pre_ltx","post_ltx")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_ileum_uni_data <- binomial_prep(ileum_asv_tab,
                                     ileum_taxa_tab,
                                     ileum_metadata,
                                     group, usage="ml_ra")
Removing 979 ASV(s)
Removing 68 ASV(s)
# fit the model
knn_model <- knn_binomial(filt_ileum_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                              reuse=FALSE,
                              file=model_name,
                              Q="Q1",
                              overfitting_check = TRUE)

# ROC curve
roc_c <- roc_curve(knn_model, group)
Warning: Ignoring unknown aesthetics: by
# save the results
supplements_models[["models_summ"]][["knn_model_ra"]][[model_name]] <- knn_model$model_summary

supplements_models[["roc_cs"]][["knn_model_ra"]][[model_name]] <- knn_model$kfold_rocobjs

# see the results
knn_model$model_summary %>% t()
                                      [,1]
k                               30.0000000
auc                              0.6004141
auc_optimism_corrected           0.4773470
auc_optimism_corrected_CIL       0.4016268
auc_optimism_corrected_CIU       0.5291267
accuracy                         0.6394231
accuracy_optimism_corrected      0.5723595
accuracy_optimism_corrected_CIL  0.4473894
accuracy_optimism_corrected_CIU  0.6494254
roc_c

post_ltx vs healthy

group <- c("post_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_ileum_uni_data <- binomial_prep(ileum_asv_tab,
                                     ileum_taxa_tab,
                                     ileum_metadata,
                                     group, usage="ml_ra")
Removing 641 ASV(s)
Removing 104 ASV(s)
# fit the model
knn_model <- knn_binomial(filt_ileum_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                              reuse=FALSE,
                              file=model_name,
                              Q="Q1",
                              overfitting_check = TRUE)

# ROC curve
roc_c <- roc_curve(knn_model, group)
Warning: Ignoring unknown aesthetics: by
# save the results
supplements_models[["models_summ"]][["knn_model_ra"]][[model_name]] <- knn_model$model_summary

supplements_models[["roc_cs"]][["knn_model_ra"]][[model_name]] <- knn_model$kfold_rocobjs

# see the results
knn_model$model_summary %>% t()
                                      [,1]
k                               30.0000000
auc                              0.5622891
auc_optimism_corrected           0.4896604
auc_optimism_corrected_CIL       0.4452667
auc_optimism_corrected_CIU       0.5331828
accuracy                         0.6445498
accuracy_optimism_corrected      0.5767476
accuracy_optimism_corrected_CIL  0.4900313
accuracy_optimism_corrected_CIU  0.6574726
roc_c

Genus level
level="genus"

Aggregate taxa

genus_data <- aggregate_taxa(ileum_asv_tab,
                             ileum_taxa_tab,
                             taxonomic_level = level)

ileum_genus_tab <- genus_data[[1]]
ileum_genus_taxa_tab <- genus_data[[2]]

pre_ltx vs healthy

group <- c("pre_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_ileum_uni_data <- binomial_prep(ileum_genus_tab,
                                     ileum_genus_taxa_tab,
                                     ileum_metadata,
                                     group, 
                                     usage="ml_ra")
Removing 84 ASV(s)
Removing 10 ASV(s)
# fit the model
knn_model <- knn_binomial(filt_ileum_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                              reuse=FALSE,
                              file=model_name,
                              Q="Q1",
                              overfitting_check = TRUE)

# ROC curve
roc_c <- roc_curve(knn_model, group)
Warning: Ignoring unknown aesthetics: by
# save the results
supplements_models[["models_summ"]][["knn_model_ra"]][[model_name]] <- knn_model$model_summary

supplements_models[["roc_cs"]][["knn_model_ra"]][[model_name]] <- knn_model$kfold_rocobjs

# see the results
knn_model$model_summary %>% t()
                                      [,1]
k                               28.0000000
auc                              0.5368885
auc_optimism_corrected           0.4642119
auc_optimism_corrected_CIL       0.3174013
auc_optimism_corrected_CIU       0.6018534
accuracy                         0.5384615
accuracy_optimism_corrected      0.4715343
accuracy_optimism_corrected_CIL  0.3693182
accuracy_optimism_corrected_CIU  0.5633929
roc_c

pre_ltx vs post_ltx

group <- c("pre_ltx","post_ltx")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_ileum_uni_data <- binomial_prep(ileum_genus_tab,
                                     ileum_genus_taxa_tab,
                                     ileum_metadata,
                                     group, 
                                     usage="ml_ra")
Removing 46 ASV(s)
Removing 6 ASV(s)
# fit the model
knn_model <- knn_binomial(filt_ileum_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                              reuse=FALSE,
                              file=model_name,
                              Q="Q1",
                              overfitting_check = TRUE)

# ROC curve
roc_c <- roc_curve(knn_model, group)
Warning: Ignoring unknown aesthetics: by
# save the results
supplements_models[["models_summ"]][["knn_model_ra"]][[model_name]] <- knn_model$model_summary

supplements_models[["roc_cs"]][["knn_model_ra"]][[model_name]] <- knn_model$kfold_rocobjs

# see the results
knn_model$model_summary %>% t()
                                      [,1]
k                               30.0000000
auc                              0.6008282
auc_optimism_corrected           0.5078505
auc_optimism_corrected_CIL       0.4423814
auc_optimism_corrected_CIU       0.5495103
accuracy                         0.6682692
accuracy_optimism_corrected      0.6368878
accuracy_optimism_corrected_CIL  0.5878571
accuracy_optimism_corrected_CIU  0.6714029
roc_c

post_ltx vs healthy

group <- c("post_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_ileum_uni_data <- binomial_prep(ileum_genus_tab,
                                     ileum_genus_taxa_tab,
                                     ileum_metadata,
                                     group, 
                                     usage="ml_ra")
Removing 45 ASV(s)
Removing 2 ASV(s)
# fit the model
knn_model <- knn_binomial(filt_ileum_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                              reuse=FALSE,
                              file=model_name,
                              Q="Q1",
                              overfitting_check = TRUE)

# ROC curve
roc_c <- roc_curve(knn_model, group)
Warning: Ignoring unknown aesthetics: by
# save the results
supplements_models[["models_summ"]][["knn_model_ra"]][[model_name]] <- knn_model$model_summary

supplements_models[["roc_cs"]][["knn_model_ra"]][[model_name]] <- knn_model$kfold_rocobjs

# see the results
knn_model$model_summary %>% t()
                                      [,1]
k                               20.0000000
auc                              0.6211535
auc_optimism_corrected           0.5026502
auc_optimism_corrected_CIL       0.4369054
auc_optimism_corrected_CIU       0.6005189
accuracy                         0.6824645
accuracy_optimism_corrected      0.5846801
accuracy_optimism_corrected_CIL  0.4887763
accuracy_optimism_corrected_CIU  0.6809631
roc_c

Random Forest
ASV level
level="ASV"

pre_ltx vs healthy

group <- c("pre_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_ileum_uni_data <- binomial_prep(ileum_asv_tab,
                                     ileum_taxa_tab,
                                     ileum_metadata,
                                     group, usage="ml_ra")
Removing 1598 ASV(s)
Removing 146 ASV(s)
# fit the model
rf_model <- rf_binomial(filt_ileum_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                              reuse=FALSE,
                              file=model_name,
                              Q="Q1",
                              overfitting_check = TRUE)

# ROC curve
roc_c <- roc_curve(rf_model, group)
Warning: Ignoring unknown aesthetics: by
# save the results
supplements_models[["models_summ"]][["rf_model_ra"]][[model_name]] <- rf_model$model_summary

supplements_models[["roc_cs"]][["rf_model_ra"]][[model_name]] <- rf_model$kfold_rocobjs

# see the results
rf_model$model_summary %>% t()
                                [,1]       
mtry                            "1"        
splitrule                       "gini"     
min.node.size                   "5"        
auc                             "1"        
auc_optimism_corrected          "0.5669823"
auc_optimism_corrected_CIL      "0.4528649"
auc_optimism_corrected_CIU      "0.6767989"
accuracy                        "0.986014" 
accuracy_optimism_corrected     "0.5038023"
accuracy_optimism_corrected_CIL "0.4194299"
accuracy_optimism_corrected_CIU "0.6194728"
roc_c

pre_ltx vs post_ltx

group <- c("pre_ltx","post_ltx")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_ileum_uni_data <- binomial_prep(ileum_asv_tab,
                                     ileum_taxa_tab,
                                     ileum_metadata,
                                     group, usage="ml_ra")
Removing 979 ASV(s)
Removing 68 ASV(s)
# fit the model
rf_model <- rf_binomial(filt_ileum_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                              reuse=FALSE,
                              file=model_name,
                              Q="Q1",
                              overfitting_check = TRUE)

# ROC curve
roc_c <- roc_curve(rf_model, group)
Warning: Ignoring unknown aesthetics: by
# save the results
supplements_models[["models_summ"]][["rf_model_ra"]][[model_name]] <- rf_model$model_summary

supplements_models[["roc_cs"]][["rf_model_ra"]][[model_name]] <- rf_model$kfold_rocobjs


# see the results
rf_model$model_summary %>% t()
                                [,1]       
mtry                            "53"       
splitrule                       "gini"     
min.node.size                   "5"        
auc                             "1"        
auc_optimism_corrected          "0.5767144"
auc_optimism_corrected_CIL      "0.4718909"
auc_optimism_corrected_CIU      "0.6804065"
accuracy                        "1"        
accuracy_optimism_corrected     "0.6315153"
accuracy_optimism_corrected_CIL "0.5730885"
accuracy_optimism_corrected_CIU "0.6743635"
roc_c

post_ltx vs healthy

group <- c("post_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_ileum_uni_data <- binomial_prep(ileum_asv_tab,
                                     ileum_taxa_tab,
                                     ileum_metadata,
                                     group, usage="ml_ra")
Removing 641 ASV(s)
Removing 104 ASV(s)
# fit the model
rf_model <- rf_binomial(filt_ileum_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                              reuse=FALSE,
                              file=model_name,
                              Q="Q1",
                              overfitting_check = TRUE)

# ROC curve
roc_c <- roc_curve(rf_model, group)
Warning: Ignoring unknown aesthetics: by
# save the results
supplements_models[["models_summ"]][["rf_model_ra"]][[model_name]] <- rf_model$model_summary

supplements_models[["roc_cs"]][["rf_model_ra"]][[model_name]] <- rf_model$kfold_rocobjs


# see the results
rf_model$model_summary %>% t()
                                [,1]       
mtry                            "1"        
splitrule                       "gini"     
min.node.size                   "2"        
auc                             "1"        
auc_optimism_corrected          "0.4144108"
auc_optimism_corrected_CIL      "0.3644445"
auc_optimism_corrected_CIU      "0.4922166"
accuracy                        "0.9478673"
accuracy_optimism_corrected     "0.6377866"
accuracy_optimism_corrected_CIL "0.580098" 
accuracy_optimism_corrected_CIU "0.7365317"
roc_c

Genus level
level="genus"

Aggregate taxa

genus_data <- aggregate_taxa(ileum_asv_tab,
                             ileum_taxa_tab,
                             taxonomic_level = level)

ileum_genus_tab <- genus_data[[1]]
ileum_genus_taxa_tab <- genus_data[[2]]

pre_ltx vs healthy

group <- c("pre_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_ileum_uni_data <- binomial_prep(ileum_genus_tab,
                                     ileum_genus_taxa_tab,
                                     ileum_metadata,
                                     group, 
                                     usage="ml_ra")
Removing 84 ASV(s)
Removing 10 ASV(s)
# fit the model
rf_model <- rf_binomial(filt_ileum_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                              reuse=FALSE,
                              file=model_name,
                              Q="Q1",
                              overfitting_check = TRUE)

# ROC curve
roc_c <- roc_curve(rf_model, group)
Warning: Ignoring unknown aesthetics: by
# save the results
supplements_models[["models_summ"]][["rf_model_ra"]][[model_name]] <- rf_model$model_summary

supplements_models[["roc_cs"]][["rf_model_ra"]][[model_name]] <- rf_model$kfold_rocobjs


# see the results
rf_model$model_summary %>% t()
                                [,1]       
mtry                            "117"      
splitrule                       "gini"     
min.node.size                   "2"        
auc                             "1"        
auc_optimism_corrected          "0.4948943"
auc_optimism_corrected_CIL      "0.4067949"
auc_optimism_corrected_CIU      "0.5945465"
accuracy                        "1"        
accuracy_optimism_corrected     "0.4732102"
accuracy_optimism_corrected_CIL "0.3877458"
accuracy_optimism_corrected_CIU "0.5686154"
roc_c

pre_ltx vs post_ltx

group <- c("pre_ltx","post_ltx")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_ileum_uni_data <- binomial_prep(ileum_genus_tab,
                                     ileum_genus_taxa_tab,
                                     ileum_metadata,
                                     group, 
                                     usage="ml_ra")
Removing 46 ASV(s)
Removing 6 ASV(s)
# fit the model
rf_model <- rf_binomial(filt_ileum_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                              reuse=FALSE,
                              file=model_name,
                              Q="Q1",
                              overfitting_check = TRUE)

# ROC curve
roc_c <- roc_curve(rf_model, group)
Warning: Ignoring unknown aesthetics: by
# save the results
supplements_models[["models_summ"]][["rf_model_ra"]][[model_name]] <- rf_model$model_summary

supplements_models[["roc_cs"]][["rf_model_ra"]][[model_name]] <- rf_model$kfold_rocobjs


# see the results
rf_model$model_summary %>% t()
                                [,1]       
mtry                            "1"        
splitrule                       "gini"     
min.node.size                   "2"        
auc                             "1"        
auc_optimism_corrected          "0.4660819"
auc_optimism_corrected_CIL      "0.3747545"
auc_optimism_corrected_CIU      "0.5351771"
accuracy                        "1"        
accuracy_optimism_corrected     "0.6339685"
accuracy_optimism_corrected_CIL "0.5722706"
accuracy_optimism_corrected_CIU "0.690873" 
roc_c

post_ltx vs healthy

group <- c("post_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_ileum_uni_data <- binomial_prep(ileum_genus_tab,
                                     ileum_genus_taxa_tab,
                                     ileum_metadata,
                                     group, 
                                     usage="ml_ra")
Removing 45 ASV(s)
Removing 2 ASV(s)
# fit the model
rf_model <- rf_binomial(filt_ileum_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                              reuse=FALSE,
                              file=model_name,
                              Q="Q1",
                              overfitting_check = TRUE)

# ROC curve
roc_c <- roc_curve(rf_model, group)
Warning: Ignoring unknown aesthetics: by
# save the results
supplements_models[["models_summ"]][["rf_model_ra"]][[model_name]] <- rf_model$model_summary

supplements_models[["roc_cs"]][["rf_model_ra"]][[model_name]] <- rf_model$kfold_rocobjs


# see the results
rf_model$model_summary %>% t()
                                [,1]       
mtry                            "5"        
splitrule                       "gini"     
min.node.size                   "2"        
auc                             "1"        
auc_optimism_corrected          "0.5034925"
auc_optimism_corrected_CIL      "0.4477087"
auc_optimism_corrected_CIU      "0.5660385"
accuracy                        "1"        
accuracy_optimism_corrected     "0.6141505"
accuracy_optimism_corrected_CIL "0.5210274"
accuracy_optimism_corrected_CIU "0.6779868"
roc_c

Gradient boosting
ASV level
level="ASV"

pre_ltx vs healthy

group <- c("pre_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_ileum_uni_data <- binomial_prep(ileum_asv_tab,
                                     ileum_taxa_tab,
                                     ileum_metadata,
                                     group, usage="ml_ra")
Removing 1598 ASV(s)
Removing 146 ASV(s)
# fit the model
gbm_model <- gbm_binomial(filt_ileum_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                              reuse=FALSE,
                              file=model_name,
                              Q="Q1",
                              overfitting_check = TRUE)
Warning: There were missing values in resampled performance measures.Warning: missing values found in aggregated results
Iter   TrainDeviance   ValidDeviance   StepSize   Improve
     1        1.3533             nan     0.1000    0.0058
     2        1.3347             nan     0.1000   -0.0019
     3        1.3052             nan     0.1000    0.0004
     4        1.2941             nan     0.1000    0.0000
     5        1.2766             nan     0.1000   -0.0028
     6        1.2466             nan     0.1000    0.0006
     7        1.2367             nan     0.1000   -0.0062
     8        1.2263             nan     0.1000   -0.0067
     9        1.2048             nan     0.1000   -0.0007
    10        1.1903             nan     0.1000   -0.0076
    20        1.0878             nan     0.1000   -0.0024
    40        0.8923             nan     0.1000    0.0032
    60        0.7288             nan     0.1000   -0.0007
    80        0.6337             nan     0.1000   -0.0060
   100        0.5305             nan     0.1000   -0.0022
Using 100 trees...

Warning: There were missing values in resampled performance measures.Warning: missing values found in aggregated results
Iter   TrainDeviance   ValidDeviance   StepSize   Improve
     1        1.2974             nan     0.1000    0.0277
     2        1.2320             nan     0.1000    0.0161
     3        1.1742             nan     0.1000    0.0104
     4        1.1268             nan     0.1000    0.0106
     5        1.0589             nan     0.1000    0.0211
     6        1.0188             nan     0.1000    0.0037
     7        0.9733             nan     0.1000    0.0118
     8        0.9389             nan     0.1000    0.0028
     9        0.9051             nan     0.1000    0.0025
    10        0.8686             nan     0.1000    0.0106
    20        0.6000             nan     0.1000   -0.0004
    40        0.3107             nan     0.1000    0.0021
    60        0.1756             nan     0.1000    0.0027
    80        0.1085             nan     0.1000   -0.0006
   100        0.0654             nan     0.1000   -0.0002
Using 100 trees...

Using 100 trees...

Warning: There were missing values in resampled performance measures.Warning: missing values found in aggregated results
Iter   TrainDeviance   ValidDeviance   StepSize   Improve
     1        1.3052             nan     0.1000    0.0301
     2        1.2430             nan     0.1000    0.0120
     3        1.1875             nan     0.1000    0.0103
     4        1.1328             nan     0.1000    0.0157
     5        1.0862             nan     0.1000    0.0024
     6        1.0348             nan     0.1000    0.0195
     7        0.9840             nan     0.1000    0.0122
     8        0.9284             nan     0.1000    0.0166
     9        0.8894             nan     0.1000    0.0047
    10        0.8474             nan     0.1000    0.0022
    20        0.5772             nan     0.1000    0.0047
    40        0.2837             nan     0.1000   -0.0000
    60        0.1482             nan     0.1000    0.0009
    80        0.0872             nan     0.1000    0.0006
   100        0.0522             nan     0.1000   -0.0005
   120        0.0289             nan     0.1000   -0.0002
   140        0.0170             nan     0.1000   -0.0002
   160        0.0108             nan     0.1000   -0.0001
   180        0.0060             nan     0.1000   -0.0000
   200        0.0043             nan     0.1000   -0.0001
   220        0.0024             nan     0.1000   -0.0000
   240        0.0015             nan     0.1000   -0.0000
   260        0.0009             nan     0.1000   -0.0000
   280        0.0006             nan     0.1000    0.0000
   300        0.0004             nan     0.1000   -0.0000
   320        0.0003             nan     0.1000   -0.0000
   340        0.0001             nan     0.1000   -0.0000
   360        0.0001             nan     0.1000   -0.0000
   380        0.0000             nan     0.1000   -0.0000
   400        0.0000             nan     0.1000   -0.0000
   420        0.0000             nan     0.1000    0.0000
   440        0.0000             nan     0.1000    0.0000
   460        0.0000             nan     0.1000    0.0000
   480        0.0000             nan     0.1000    0.0000
   500        0.0000             nan     0.1000   -0.0000
Using 100 trees...

Using 100 trees...

Warning: There were missing values in resampled performance measures.Warning: missing values found in aggregated results
Iter   TrainDeviance   ValidDeviance   StepSize   Improve
     1        1.3514             nan     0.1000    0.0014
     2        1.3139             nan     0.1000    0.0129
     3        1.2839             nan     0.1000    0.0019
     4        1.2566             nan     0.1000    0.0044
     5        1.2399             nan     0.1000   -0.0039
     6        1.2262             nan     0.1000   -0.0076
     7        1.2055             nan     0.1000    0.0056
     8        1.1860             nan     0.1000   -0.0025
     9        1.1659             nan     0.1000   -0.0040
    10        1.1509             nan     0.1000    0.0032
    20        0.9806             nan     0.1000   -0.0013
    40        0.7090             nan     0.1000    0.0020
    60        0.5496             nan     0.1000   -0.0035
    80        0.4208             nan     0.1000   -0.0010
   100        0.3375             nan     0.1000   -0.0030
   120        0.2756             nan     0.1000   -0.0010
   140        0.2228             nan     0.1000   -0.0009
   160        0.1729             nan     0.1000    0.0001
   180        0.1391             nan     0.1000    0.0001
   200        0.1110             nan     0.1000   -0.0006
   220        0.0895             nan     0.1000   -0.0005
   240        0.0729             nan     0.1000   -0.0005
   260        0.0584             nan     0.1000   -0.0002
   280        0.0470             nan     0.1000   -0.0003
   300        0.0393             nan     0.1000   -0.0002
   320        0.0317             nan     0.1000   -0.0001
   340        0.0265             nan     0.1000   -0.0001
   360        0.0213             nan     0.1000   -0.0002
   380        0.0173             nan     0.1000   -0.0001
   400        0.0146             nan     0.1000   -0.0001
   420        0.0117             nan     0.1000   -0.0000
   440        0.0093             nan     0.1000    0.0000
   460        0.0074             nan     0.1000    0.0000
   480        0.0061             nan     0.1000   -0.0000
   500        0.0050             nan     0.1000   -0.0000
Using 100 trees...

Using 100 trees...

Warning: There were missing values in resampled performance measures.Warning: missing values found in aggregated results
Iter   TrainDeviance   ValidDeviance   StepSize   Improve
     1        1.3449             nan     0.1000    0.0155
     2        1.3084             nan     0.1000    0.0123
     3        1.2811             nan     0.1000    0.0020
     4        1.2529             nan     0.1000    0.0087
     5        1.2213             nan     0.1000    0.0077
     6        1.2003             nan     0.1000   -0.0032
     7        1.1714             nan     0.1000    0.0078
     8        1.1472             nan     0.1000    0.0006
     9        1.1230             nan     0.1000    0.0005
    10        1.1041             nan     0.1000    0.0011
    20        0.9241             nan     0.1000   -0.0046
    40        0.6846             nan     0.1000   -0.0005
    60        0.5336             nan     0.1000   -0.0028
    80        0.4341             nan     0.1000   -0.0034
   100        0.3490             nan     0.1000   -0.0019
   120        0.2752             nan     0.1000   -0.0018
   140        0.2161             nan     0.1000   -0.0014
   160        0.1735             nan     0.1000   -0.0019
   180        0.1393             nan     0.1000   -0.0005
   200        0.1118             nan     0.1000   -0.0011
   220        0.0876             nan     0.1000   -0.0002
   240        0.0735             nan     0.1000    0.0003
   260        0.0604             nan     0.1000   -0.0001
   280        0.0498             nan     0.1000   -0.0004
   300        0.0403             nan     0.1000   -0.0001
   320        0.0337             nan     0.1000   -0.0002
   340        0.0280             nan     0.1000   -0.0001
   360        0.0240             nan     0.1000   -0.0001
   380        0.0192             nan     0.1000   -0.0000
   400        0.0155             nan     0.1000   -0.0000
   420        0.0125             nan     0.1000    0.0000
   440        0.0103             nan     0.1000   -0.0000
   460        0.0083             nan     0.1000   -0.0001
   480        0.0069             nan     0.1000   -0.0001
   500        0.0056             nan     0.1000   -0.0000
Using 100 trees...

Using 100 trees...

Warning: There were missing values in resampled performance measures.Warning: missing values found in aggregated results
Iter   TrainDeviance   ValidDeviance   StepSize   Improve
     1        1.3785             nan     0.1000   -0.0101
     2        1.3525             nan     0.1000    0.0066
     3        1.3225             nan     0.1000    0.0040
     4        1.3096             nan     0.1000   -0.0071
     5        1.2884             nan     0.1000    0.0039
     6        1.2634             nan     0.1000    0.0008
     7        1.2478             nan     0.1000    0.0029
     8        1.2294             nan     0.1000    0.0008
     9        1.2124             nan     0.1000    0.0063
    10        1.1999             nan     0.1000    0.0007
    20        1.1019             nan     0.1000    0.0011
    40        0.9031             nan     0.1000   -0.0043
    60        0.7603             nan     0.1000   -0.0018
    80        0.6568             nan     0.1000   -0.0038
   100        0.5726             nan     0.1000   -0.0010
   120        0.4990             nan     0.1000   -0.0015
   140        0.4378             nan     0.1000   -0.0039
   160        0.3836             nan     0.1000    0.0002
   180        0.3437             nan     0.1000   -0.0002
   200        0.3042             nan     0.1000   -0.0025
   220        0.2686             nan     0.1000   -0.0003
   240        0.2446             nan     0.1000   -0.0008
   260        0.2194             nan     0.1000   -0.0012
   280        0.1974             nan     0.1000   -0.0005
   300        0.1737             nan     0.1000   -0.0001
   320        0.1569             nan     0.1000   -0.0009
   340        0.1404             nan     0.1000   -0.0006
   360        0.1273             nan     0.1000   -0.0008
   380        0.1141             nan     0.1000   -0.0006
   400        0.1031             nan     0.1000   -0.0012
   420        0.0942             nan     0.1000    0.0001
   440        0.0858             nan     0.1000   -0.0005
   460        0.0798             nan     0.1000   -0.0007
   480        0.0722             nan     0.1000   -0.0002
   500        0.0652             nan     0.1000   -0.0002
Using 100 trees...

Using 100 trees...

Warning: There were missing values in resampled performance measures.Warning: missing values found in aggregated results
Iter   TrainDeviance   ValidDeviance   StepSize   Improve
     1        1.3569             nan     0.1000    0.0004
     2        1.3305             nan     0.1000    0.0067
     3        1.3074             nan     0.1000    0.0030
     4        1.2791             nan     0.1000    0.0037
     5        1.2596             nan     0.1000   -0.0013
     6        1.2436             nan     0.1000    0.0001
     7        1.2257             nan     0.1000   -0.0016
     8        1.2055             nan     0.1000   -0.0011
     9        1.1887             nan     0.1000   -0.0003
    10        1.1741             nan     0.1000   -0.0005
    20        1.0208             nan     0.1000    0.0004
    40        0.8133             nan     0.1000   -0.0006
    60        0.6542             nan     0.1000   -0.0021
    80        0.5379             nan     0.1000   -0.0023
   100        0.4599             nan     0.1000   -0.0031
Using 100 trees...

Using 100 trees...

Warning: There were missing values in resampled performance measures.Warning: missing values found in aggregated results
Iter   TrainDeviance   ValidDeviance   StepSize   Improve
     1        1.3297             nan     0.1000    0.0049
     2        1.2817             nan     0.1000    0.0012
     3        1.2242             nan     0.1000    0.0211
     4        1.1878             nan     0.1000    0.0012
     5        1.1395             nan     0.1000    0.0111
     6        1.0954             nan     0.1000    0.0051
     7        1.0600             nan     0.1000    0.0044
     8        1.0161             nan     0.1000    0.0089
     9        0.9887             nan     0.1000   -0.0012
    10        0.9557             nan     0.1000   -0.0003
    20        0.7187             nan     0.1000    0.0024
    40        0.3925             nan     0.1000    0.0012
    60        0.2487             nan     0.1000   -0.0023
    80        0.1567             nan     0.1000    0.0011
   100        0.1039             nan     0.1000   -0.0014
   120        0.0700             nan     0.1000   -0.0005
   140        0.0451             nan     0.1000   -0.0003
   160        0.0299             nan     0.1000   -0.0004
   180        0.0208             nan     0.1000   -0.0002
   200        0.0143             nan     0.1000   -0.0000
Using 100 trees...

Using 100 trees...

Warning: There were missing values in resampled performance measures.Warning: missing values found in aggregated results
Iter   TrainDeviance   ValidDeviance   StepSize   Improve
     1        1.3302             nan     0.1000    0.0216
     2        1.2812             nan     0.1000    0.0144
     3        1.2430             nan     0.1000    0.0089
     4        1.2285             nan     0.1000   -0.0018
     5        1.1963             nan     0.1000   -0.0031
     6        1.1609             nan     0.1000    0.0171
     7        1.1311             nan     0.1000   -0.0032
     8        1.1103             nan     0.1000    0.0085
     9        1.0765             nan     0.1000    0.0101
    10        1.0462             nan     0.1000    0.0099
    20        0.8354             nan     0.1000    0.0029
    40        0.6035             nan     0.1000    0.0021
    60        0.4410             nan     0.1000   -0.0016
    80        0.3224             nan     0.1000    0.0007
   100        0.2380             nan     0.1000   -0.0000
   120        0.1809             nan     0.1000   -0.0007
   140        0.1344             nan     0.1000   -0.0001
   160        0.1034             nan     0.1000    0.0001
   180        0.0778             nan     0.1000    0.0002
   200        0.0593             nan     0.1000    0.0002
   220        0.0468             nan     0.1000   -0.0005
   240        0.0365             nan     0.1000   -0.0003
   260        0.0282             nan     0.1000   -0.0000
   280        0.0218             nan     0.1000   -0.0001
   300        0.0178             nan     0.1000   -0.0001
   320        0.0139             nan     0.1000   -0.0000
   340        0.0110             nan     0.1000    0.0000
   360        0.0085             nan     0.1000   -0.0000
   380        0.0070             nan     0.1000   -0.0001
   400        0.0055             nan     0.1000   -0.0000
   420        0.0043             nan     0.1000   -0.0000
   440        0.0034             nan     0.1000   -0.0000
   460        0.0028             nan     0.1000   -0.0000
   480        0.0022             nan     0.1000    0.0000
   500        0.0017             nan     0.1000   -0.0000
Using 100 trees...

Using 100 trees...

Warning: There were missing values in resampled performance measures.Warning: missing values found in aggregated results
Iter   TrainDeviance   ValidDeviance   StepSize   Improve
     1        1.3385             nan     0.1000    0.0080
     2        1.2785             nan     0.1000    0.0276
     3        1.2533             nan     0.1000    0.0061
     4        1.2210             nan     0.1000    0.0104
     5        1.1870             nan     0.1000    0.0050
     6        1.1582             nan     0.1000    0.0028
     7        1.1284             nan     0.1000    0.0075
     8        1.0950             nan     0.1000    0.0128
     9        1.0610             nan     0.1000    0.0113
    10        1.0429             nan     0.1000   -0.0027
    20        0.8268             nan     0.1000   -0.0000
    40        0.5818             nan     0.1000    0.0013
    60        0.4222             nan     0.1000   -0.0007
    80        0.3115             nan     0.1000    0.0005
   100        0.2286             nan     0.1000   -0.0004
   120        0.1760             nan     0.1000   -0.0011
   140        0.1366             nan     0.1000   -0.0021
   160        0.1061             nan     0.1000   -0.0003
   180        0.0832             nan     0.1000   -0.0003
   200        0.0595             nan     0.1000    0.0004
   220        0.0462             nan     0.1000   -0.0002
   240        0.0355             nan     0.1000    0.0002
   260        0.0269             nan     0.1000   -0.0002
   280        0.0212             nan     0.1000   -0.0003
   300        0.0164             nan     0.1000   -0.0001
   320        0.0129             nan     0.1000   -0.0001
   340        0.0098             nan     0.1000    0.0000
   360        0.0073             nan     0.1000    0.0000
   380        0.0056             nan     0.1000   -0.0001
   400        0.0043             nan     0.1000   -0.0000
   420        0.0034             nan     0.1000   -0.0000
   440        0.0026             nan     0.1000    0.0000
   460        0.0020             nan     0.1000    0.0000
   480        0.0016             nan     0.1000   -0.0000
   500        0.0013             nan     0.1000   -0.0000
Using 100 trees...

Using 100 trees...

Warning: There were missing values in resampled performance measures.Warning: missing values found in aggregated results
Iter   TrainDeviance   ValidDeviance   StepSize   Improve
     1        1.3458             nan     0.1000    0.0100
     2        1.3139             nan     0.1000    0.0115
     3        1.2862             nan     0.1000    0.0069
     4        1.2691             nan     0.1000   -0.0016
     5        1.2508             nan     0.1000    0.0026
     6        1.2318             nan     0.1000    0.0034
     7        1.2056             nan     0.1000    0.0082
     8        1.1827             nan     0.1000    0.0072
     9        1.1676             nan     0.1000   -0.0003
    10        1.1505             nan     0.1000    0.0025
    20        1.0061             nan     0.1000   -0.0010
    40        0.8021             nan     0.1000    0.0001
    60        0.6640             nan     0.1000   -0.0012
    80        0.5488             nan     0.1000   -0.0037
   100        0.4591             nan     0.1000   -0.0004
   120        0.3882             nan     0.1000   -0.0012
   140        0.3312             nan     0.1000   -0.0001
   160        0.2867             nan     0.1000   -0.0014
   180        0.2477             nan     0.1000   -0.0006
   200        0.2135             nan     0.1000   -0.0003
Using 100 trees...

Using 100 trees...
# ROC curve
roc_c <- roc_curve(gbm_model, group)
Warning: Ignoring unknown aesthetics: by
# save the results
supplements_models[["models_summ"]][["gbm_model_ra"]][[model_name]] <- gbm_model$model_summary

supplements_models[["roc_cs"]][["gbm_model_ra"]][[model_name]] <- gbm_model$kfold_rocobjs

# see the results
gbm_model$model_summary %>% t()
                                       [,1]
n.trees                         100.0000000
interaction.depth                 5.0000000
shrinkage                         0.1000000
n.minobsinnode                   20.0000000
auc                               0.9972603
auc_optimism_corrected            0.4789823
auc_optimism_corrected_CIL        0.3795586
auc_optimism_corrected_CIU        0.5960000
accuracy                          0.9650350
accuracy_optimism_corrected       0.4911041
accuracy_optimism_corrected_CIL   0.3543776
accuracy_optimism_corrected_CIU   0.6332143
roc_c

pre_ltx vs post_ltx

group <- c("pre_ltx","post_ltx")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_ileum_uni_data <- binomial_prep(ileum_asv_tab,
                                     ileum_taxa_tab,
                                     ileum_metadata,
                                     group, usage="ml_ra")
Removing 979 ASV(s)
Removing 68 ASV(s)
# fit the model
gbm_model <- gbm_binomial(filt_ileum_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                              reuse=FALSE,
                              file=model_name,
                              Q="Q1",
                              overfitting_check = TRUE)
Iter   TrainDeviance   ValidDeviance   StepSize   Improve
     1        1.2621             nan     0.1000   -0.0122
     2        1.2456             nan     0.1000   -0.0109
     3        1.2254             nan     0.1000   -0.0034
     4        1.2096             nan     0.1000   -0.0025
     5        1.1932             nan     0.1000    0.0011
     6        1.1820             nan     0.1000   -0.0055
     7        1.1580             nan     0.1000    0.0031
     8        1.1375             nan     0.1000   -0.0031
     9        1.1223             nan     0.1000   -0.0032
    10        1.1132             nan     0.1000   -0.0056
    20        0.9729             nan     0.1000   -0.0013
    40        0.7620             nan     0.1000   -0.0041
    60        0.6196             nan     0.1000   -0.0020
    80        0.5009             nan     0.1000   -0.0024
   100        0.4146             nan     0.1000   -0.0019
   120        0.3443             nan     0.1000   -0.0009
   140        0.2907             nan     0.1000   -0.0031
   160        0.2413             nan     0.1000   -0.0010
   180        0.2046             nan     0.1000   -0.0014
   200        0.1719             nan     0.1000   -0.0007
Using 200 trees...
Iter   TrainDeviance   ValidDeviance   StepSize   Improve
     1        1.2789             nan     0.1000    0.0134
     2        1.2141             nan     0.1000    0.0203
     3        1.1592             nan     0.1000    0.0128
     4        1.1180             nan     0.1000    0.0081
     5        1.0656             nan     0.1000    0.0110
     6        1.0227             nan     0.1000    0.0075
     7        0.9772             nan     0.1000    0.0080
     8        0.9392             nan     0.1000    0.0049
     9        0.8910             nan     0.1000    0.0069
    10        0.8583             nan     0.1000    0.0087
    20        0.5976             nan     0.1000    0.0048
    40        0.3213             nan     0.1000   -0.0003
    60        0.1827             nan     0.1000   -0.0004
    80        0.1117             nan     0.1000   -0.0004
   100        0.0699             nan     0.1000   -0.0003
Using 200 trees...

Using 200 trees...
Iter   TrainDeviance   ValidDeviance   StepSize   Improve
     1        1.2006             nan     0.1000    0.0130
     2        1.1450             nan     0.1000    0.0115
     3        1.0920             nan     0.1000    0.0101
     4        1.0470             nan     0.1000    0.0116
     5        1.0115             nan     0.1000    0.0063
     6        0.9677             nan     0.1000    0.0096
     7        0.9170             nan     0.1000    0.0151
     8        0.8679             nan     0.1000    0.0092
     9        0.8384             nan     0.1000    0.0069
    10        0.8063             nan     0.1000    0.0073
    20        0.5714             nan     0.1000   -0.0007
    40        0.3113             nan     0.1000    0.0005
    60        0.1754             nan     0.1000   -0.0007
    80        0.1000             nan     0.1000    0.0005
   100        0.0588             nan     0.1000   -0.0004
   120        0.0362             nan     0.1000   -0.0004
   140        0.0228             nan     0.1000    0.0000
   160        0.0153             nan     0.1000   -0.0001
   180        0.0095             nan     0.1000   -0.0000
   200        0.0060             nan     0.1000   -0.0000
Using 200 trees...

Using 200 trees...
Iter   TrainDeviance   ValidDeviance   StepSize   Improve
     1        1.1892             nan     0.1000    0.0211
     2        1.1535             nan     0.1000    0.0135
     3        1.1220             nan     0.1000    0.0071
     4        1.0860             nan     0.1000    0.0063
     5        1.0552             nan     0.1000    0.0047
     6        1.0282             nan     0.1000   -0.0002
     7        1.0073             nan     0.1000    0.0031
     8        0.9817             nan     0.1000    0.0024
     9        0.9594             nan     0.1000    0.0021
    10        0.9407             nan     0.1000   -0.0030
    20        0.7490             nan     0.1000    0.0007
    40        0.5078             nan     0.1000   -0.0054
    60        0.3657             nan     0.1000   -0.0005
    80        0.2773             nan     0.1000   -0.0032
   100        0.2006             nan     0.1000   -0.0023
Using 200 trees...

Using 200 trees...
Iter   TrainDeviance   ValidDeviance   StepSize   Improve
     1        1.2291             nan     0.1000    0.0087
     2        1.2015             nan     0.1000   -0.0028
     3        1.1820             nan     0.1000    0.0010
     4        1.1519             nan     0.1000    0.0022
     5        1.1221             nan     0.1000    0.0038
     6        1.0823             nan     0.1000    0.0091
     7        1.0476             nan     0.1000    0.0049
     8        1.0143             nan     0.1000    0.0087
     9        0.9958             nan     0.1000   -0.0022
    10        0.9776             nan     0.1000   -0.0036
    20        0.7769             nan     0.1000    0.0009
    40        0.5318             nan     0.1000   -0.0012
    60        0.3965             nan     0.1000   -0.0018
    80        0.2856             nan     0.1000   -0.0017
   100        0.2157             nan     0.1000   -0.0003
   120        0.1627             nan     0.1000   -0.0013
   140        0.1245             nan     0.1000   -0.0011
   160        0.0899             nan     0.1000   -0.0002
   180        0.0676             nan     0.1000   -0.0005
   200        0.0531             nan     0.1000   -0.0002
   220        0.0401             nan     0.1000   -0.0005
   240        0.0305             nan     0.1000   -0.0003
   260        0.0234             nan     0.1000    0.0000
   280        0.0182             nan     0.1000   -0.0001
   300        0.0146             nan     0.1000   -0.0000
   320        0.0111             nan     0.1000    0.0000
   340        0.0087             nan     0.1000   -0.0000
   360        0.0068             nan     0.1000   -0.0000
   380        0.0055             nan     0.1000   -0.0000
   400        0.0045             nan     0.1000   -0.0000
   420        0.0035             nan     0.1000   -0.0000
   440        0.0028             nan     0.1000   -0.0000
   460        0.0021             nan     0.1000   -0.0000
   480        0.0018             nan     0.1000   -0.0000
   500        0.0014             nan     0.1000   -0.0000
Using 200 trees...

Using 200 trees...
Iter   TrainDeviance   ValidDeviance   StepSize   Improve
     1        1.2201             nan     0.1000    0.0062
     2        1.1822             nan     0.1000    0.0076
     3        1.1435             nan     0.1000    0.0052
     4        1.1069             nan     0.1000    0.0073
     5        1.0787             nan     0.1000    0.0015
     6        1.0585             nan     0.1000    0.0042
     7        1.0228             nan     0.1000    0.0100
     8        0.9960             nan     0.1000    0.0048
     9        0.9782             nan     0.1000    0.0002
    10        0.9461             nan     0.1000    0.0060
    20        0.7507             nan     0.1000    0.0017
    40        0.5098             nan     0.1000   -0.0012
    60        0.3695             nan     0.1000   -0.0013
    80        0.2737             nan     0.1000   -0.0026
   100        0.2027             nan     0.1000   -0.0006
Using 200 trees...

Using 200 trees...
Iter   TrainDeviance   ValidDeviance   StepSize   Improve
     1        1.2650             nan     0.1000    0.0237
     2        1.2163             nan     0.1000    0.0144
     3        1.1831             nan     0.1000    0.0097
     4        1.1308             nan     0.1000    0.0175
     5        1.0983             nan     0.1000    0.0028
     6        1.0703             nan     0.1000    0.0051
     7        1.0463             nan     0.1000    0.0005
     8        1.0188             nan     0.1000    0.0010
     9        0.9927             nan     0.1000    0.0078
    10        0.9692             nan     0.1000    0.0044
    20        0.7832             nan     0.1000    0.0013
    40        0.5331             nan     0.1000    0.0044
    60        0.3829             nan     0.1000   -0.0029
    80        0.2804             nan     0.1000    0.0014
   100        0.2042             nan     0.1000   -0.0008
   120        0.1447             nan     0.1000   -0.0007
   140        0.1132             nan     0.1000   -0.0008
   160        0.0839             nan     0.1000    0.0005
   180        0.0659             nan     0.1000   -0.0001
   200        0.0505             nan     0.1000   -0.0003
Using 200 trees...

Using 200 trees...
Iter   TrainDeviance   ValidDeviance   StepSize   Improve
     1        1.1795             nan     0.1000    0.0186
     2        1.1316             nan     0.1000    0.0097
     3        1.0966             nan     0.1000    0.0071
     4        1.0708             nan     0.1000   -0.0028
     5        1.0397             nan     0.1000    0.0054
     6        1.0061             nan     0.1000    0.0041
     7        0.9640             nan     0.1000    0.0122
     8        0.9357             nan     0.1000    0.0028
     9        0.9171             nan     0.1000   -0.0031
    10        0.8973             nan     0.1000   -0.0005
    20        0.6967             nan     0.1000    0.0048
    40        0.4700             nan     0.1000   -0.0001
    60        0.3320             nan     0.1000   -0.0029
    80        0.2346             nan     0.1000   -0.0003
   100        0.1738             nan     0.1000    0.0001
Using 200 trees...

Using 200 trees...
Iter   TrainDeviance   ValidDeviance   StepSize   Improve
     1        1.3153             nan     0.1000    0.0093
     2        1.2692             nan     0.1000    0.0107
     3        1.2249             nan     0.1000    0.0166
     4        1.1934             nan     0.1000    0.0088
     5        1.1757             nan     0.1000   -0.0102
     6        1.1500             nan     0.1000   -0.0041
     7        1.1073             nan     0.1000    0.0128
     8        1.0792             nan     0.1000    0.0098
     9        1.0542             nan     0.1000   -0.0009
    10        1.0340             nan     0.1000    0.0035
    20        0.8014             nan     0.1000   -0.0022
    40        0.5252             nan     0.1000   -0.0010
    60        0.3575             nan     0.1000    0.0013
    80        0.2668             nan     0.1000   -0.0008
   100        0.1932             nan     0.1000   -0.0005
Using 200 trees...

Using 200 trees...
Iter   TrainDeviance   ValidDeviance   StepSize   Improve
     1        1.2577             nan     0.1000    0.0195
     2        1.1869             nan     0.1000    0.0164
     3        1.1256             nan     0.1000    0.0082
     4        1.0826             nan     0.1000    0.0072
     5        1.0341             nan     0.1000    0.0123
     6        0.9879             nan     0.1000    0.0125
     7        0.9364             nan     0.1000    0.0173
     8        0.8988             nan     0.1000    0.0112
     9        0.8510             nan     0.1000    0.0174
    10        0.8254             nan     0.1000    0.0019
    20        0.5779             nan     0.1000   -0.0050
    40        0.2976             nan     0.1000   -0.0023
    60        0.1648             nan     0.1000    0.0003
    80        0.0977             nan     0.1000   -0.0008
   100        0.0615             nan     0.1000    0.0002
   120        0.0375             nan     0.1000   -0.0002
   140        0.0222             nan     0.1000   -0.0000
   160        0.0141             nan     0.1000    0.0000
   180        0.0086             nan     0.1000   -0.0000
   200        0.0054             nan     0.1000   -0.0001
   220        0.0036             nan     0.1000   -0.0000
   240        0.0023             nan     0.1000   -0.0000
   260        0.0015             nan     0.1000   -0.0000
   280        0.0010             nan     0.1000   -0.0000
   300        0.0006             nan     0.1000   -0.0000
   320        0.0004             nan     0.1000    0.0000
   340        0.0003             nan     0.1000   -0.0000
   360        0.0002             nan     0.1000   -0.0000
   380        0.0001             nan     0.1000   -0.0000
   400        0.0001             nan     0.1000   -0.0000
   420        0.0000             nan     0.1000   -0.0000
   440        0.0000             nan     0.1000   -0.0000
   460        0.0000             nan     0.1000    0.0000
   480        0.0000             nan     0.1000   -0.0000
   500        0.0000             nan     0.1000   -0.0000
Using 200 trees...

Using 200 trees...
Iter   TrainDeviance   ValidDeviance   StepSize   Improve
     1        1.2945             nan     0.1000    0.0110
     2        1.2398             nan     0.1000    0.0072
     3        1.1851             nan     0.1000    0.0071
     4        1.1469             nan     0.1000    0.0031
     5        1.0992             nan     0.1000    0.0088
     6        1.0472             nan     0.1000    0.0150
     7        0.9976             nan     0.1000    0.0122
     8        0.9698             nan     0.1000   -0.0006
     9        0.9335             nan     0.1000    0.0094
    10        0.9077             nan     0.1000    0.0004
    20        0.6311             nan     0.1000    0.0036
    40        0.3470             nan     0.1000   -0.0005
    60        0.2013             nan     0.1000   -0.0004
    80        0.1212             nan     0.1000   -0.0005
   100        0.0717             nan     0.1000   -0.0001
Using 200 trees...

Using 200 trees...
# ROC curve
roc_c <- roc_curve(gbm_model, group)
Warning: Ignoring unknown aesthetics: by
# save the results
supplements_models[["models_summ"]][["gbm_model_ra"]][[model_name]] <- gbm_model$model_summary

supplements_models[["roc_cs"]][["gbm_model_ra"]][[model_name]] <- gbm_model$kfold_rocobjs

# see the results
gbm_model$model_summary %>% t()
                                       [,1]
n.trees                         200.0000000
interaction.depth                 5.0000000
shrinkage                         0.1000000
n.minobsinnode                   20.0000000
auc                               1.0000000
auc_optimism_corrected            0.4843010
auc_optimism_corrected_CIL        0.4102344
auc_optimism_corrected_CIU        0.5552723
accuracy                          1.0000000
accuracy_optimism_corrected       0.6023346
accuracy_optimism_corrected_CIL   0.5617955
accuracy_optimism_corrected_CIU   0.6582415
roc_c

post_ltx vs healthy

group <- c("post_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_ileum_uni_data <- binomial_prep(ileum_asv_tab,
                                     ileum_taxa_tab,
                                     ileum_metadata,
                                     group, usage="ml_ra")
Removing 641 ASV(s)
Removing 104 ASV(s)
# fit the model
gbm_model <- gbm_binomial(filt_ileum_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                              reuse=FALSE,
                              file=model_name,
                              Q="Q1",
                              overfitting_check = TRUE)
Iter   TrainDeviance   ValidDeviance   StepSize   Improve
     1        1.2802             nan     0.1000   -0.0038
     2        1.2656             nan     0.1000   -0.0004
     3        1.2504             nan     0.1000   -0.0036
     4        1.2418             nan     0.1000   -0.0054
     5        1.2333             nan     0.1000    0.0015
     6        1.2195             nan     0.1000    0.0002
     7        1.2067             nan     0.1000   -0.0041
     8        1.2007             nan     0.1000   -0.0014
     9        1.1822             nan     0.1000    0.0020
    10        1.1701             nan     0.1000   -0.0036
    20        1.0716             nan     0.1000   -0.0035
    40        0.9348             nan     0.1000   -0.0014
    60        0.8116             nan     0.1000   -0.0024
    80        0.7218             nan     0.1000   -0.0003
   100        0.6460             nan     0.1000   -0.0026
Using 100 trees...
Iter   TrainDeviance   ValidDeviance   StepSize   Improve
     1        1.2611             nan     0.1000    0.0135
     2        1.2033             nan     0.1000    0.0199
     3        1.1760             nan     0.1000    0.0010
     4        1.1491             nan     0.1000    0.0044
     5        1.1114             nan     0.1000    0.0062
     6        1.0852             nan     0.1000   -0.0017
     7        1.0608             nan     0.1000   -0.0025
     8        1.0278             nan     0.1000    0.0080
     9        1.0016             nan     0.1000    0.0051
    10        0.9760             nan     0.1000    0.0012
    20        0.7579             nan     0.1000   -0.0002
    40        0.4971             nan     0.1000    0.0023
    60        0.3365             nan     0.1000    0.0021
    80        0.2410             nan     0.1000   -0.0011
   100        0.1690             nan     0.1000   -0.0009
   120        0.1255             nan     0.1000   -0.0001
   140        0.0891             nan     0.1000   -0.0006
   160        0.0639             nan     0.1000   -0.0003
   180        0.0453             nan     0.1000   -0.0000
   200        0.0328             nan     0.1000    0.0000
   220        0.0234             nan     0.1000    0.0000
   240        0.0169             nan     0.1000   -0.0000
   260        0.0120             nan     0.1000   -0.0001
   280        0.0085             nan     0.1000   -0.0001
   300        0.0064             nan     0.1000    0.0000
   320        0.0046             nan     0.1000   -0.0000
   340        0.0033             nan     0.1000   -0.0000
   360        0.0025             nan     0.1000   -0.0000
   380        0.0018             nan     0.1000    0.0000
   400        0.0014             nan     0.1000   -0.0000
   420        0.0010             nan     0.1000   -0.0000
   440        0.0007             nan     0.1000   -0.0000
   460        0.0005             nan     0.1000    0.0000
   480        0.0004             nan     0.1000   -0.0000
   500        0.0003             nan     0.1000    0.0000
Using 100 trees...

Using 100 trees...
Iter   TrainDeviance   ValidDeviance   StepSize   Improve
     1        1.2786             nan     0.1000   -0.0004
     2        1.2486             nan     0.1000    0.0017
     3        1.1950             nan     0.1000    0.0165
     4        1.1712             nan     0.1000   -0.0010
     5        1.1434             nan     0.1000    0.0006
     6        1.1140             nan     0.1000    0.0037
     7        1.0856             nan     0.1000    0.0048
     8        1.0511             nan     0.1000    0.0088
     9        1.0171             nan     0.1000    0.0065
    10        0.9916             nan     0.1000    0.0031
    20        0.8218             nan     0.1000   -0.0006
    40        0.5662             nan     0.1000   -0.0061
    60        0.4177             nan     0.1000   -0.0015
    80        0.3205             nan     0.1000   -0.0010
   100        0.2393             nan     0.1000   -0.0010
Using 100 trees...

Using 100 trees...
Iter   TrainDeviance   ValidDeviance   StepSize   Improve
     1        1.1971             nan     0.1000    0.0227
     2        1.1392             nan     0.1000    0.0137
     3        1.0825             nan     0.1000    0.0059
     4        1.0138             nan     0.1000    0.0160
     5        0.9737             nan     0.1000    0.0075
     6        0.9314             nan     0.1000    0.0107
     7        0.8917             nan     0.1000    0.0064
     8        0.8446             nan     0.1000    0.0079
     9        0.8119             nan     0.1000    0.0014
    10        0.7804             nan     0.1000    0.0038
    20        0.5257             nan     0.1000   -0.0012
    40        0.2603             nan     0.1000    0.0004
    60        0.1450             nan     0.1000   -0.0006
    80        0.0858             nan     0.1000   -0.0003
   100        0.0490             nan     0.1000    0.0000
   120        0.0297             nan     0.1000   -0.0002
   140        0.0175             nan     0.1000   -0.0000
   160        0.0105             nan     0.1000   -0.0000
   180        0.0067             nan     0.1000   -0.0001
   200        0.0040             nan     0.1000   -0.0000
   220        0.0023             nan     0.1000   -0.0000
   240        0.0014             nan     0.1000   -0.0000
   260        0.0009             nan     0.1000   -0.0000
   280        0.0005             nan     0.1000    0.0000
   300        0.0004             nan     0.1000   -0.0000
   320        0.0002             nan     0.1000    0.0000
   340        0.0001             nan     0.1000    0.0000
   360        0.0001             nan     0.1000    0.0000
   380        0.0001             nan     0.1000   -0.0000
   400        0.0000             nan     0.1000   -0.0000
   420        0.0000             nan     0.1000   -0.0000
   440        0.0000             nan     0.1000   -0.0000
   460        0.0000             nan     0.1000   -0.0000
   480        0.0000             nan     0.1000    0.0000
   500        0.0000             nan     0.1000   -0.0000
Using 100 trees...

Using 100 trees...
Iter   TrainDeviance   ValidDeviance   StepSize   Improve
     1        1.2427             nan     0.1000    0.0074
     2        1.2104             nan     0.1000   -0.0016
     3        1.1724             nan     0.1000    0.0093
     4        1.1441             nan     0.1000    0.0013
     5        1.1185             nan     0.1000    0.0048
     6        1.0735             nan     0.1000    0.0106
     7        1.0498             nan     0.1000    0.0040
     8        1.0162             nan     0.1000    0.0075
     9        0.9945             nan     0.1000    0.0001
    10        0.9804             nan     0.1000   -0.0054
    20        0.7761             nan     0.1000    0.0020
    40        0.5270             nan     0.1000   -0.0030
    60        0.3642             nan     0.1000   -0.0014
    80        0.2593             nan     0.1000    0.0003
   100        0.1935             nan     0.1000   -0.0013
   120        0.1443             nan     0.1000   -0.0008
   140        0.1065             nan     0.1000   -0.0001
   160        0.0827             nan     0.1000   -0.0004
   180        0.0604             nan     0.1000   -0.0000
   200        0.0461             nan     0.1000   -0.0003
Using 100 trees...

Using 100 trees...
Iter   TrainDeviance   ValidDeviance   StepSize   Improve
     1        1.2039             nan     0.1000    0.0116
     2        1.1346             nan     0.1000    0.0193
     3        1.0917             nan     0.1000    0.0017
     4        1.0433             nan     0.1000    0.0043
     5        0.9859             nan     0.1000    0.0163
     6        0.9455             nan     0.1000   -0.0008
     7        0.9085             nan     0.1000    0.0082
     8        0.8738             nan     0.1000    0.0087
     9        0.8413             nan     0.1000    0.0069
    10        0.8130             nan     0.1000    0.0005
    20        0.5548             nan     0.1000    0.0079
    40        0.3003             nan     0.1000    0.0010
    60        0.1643             nan     0.1000    0.0006
    80        0.0974             nan     0.1000   -0.0001
   100        0.0612             nan     0.1000   -0.0005
Using 100 trees...

Using 100 trees...
Iter   TrainDeviance   ValidDeviance   StepSize   Improve
     1        1.2703             nan     0.1000    0.0100
     2        1.2293             nan     0.1000    0.0050
     3        1.1891             nan     0.1000    0.0066
     4        1.1567             nan     0.1000   -0.0010
     5        1.1295             nan     0.1000    0.0033
     6        1.0929             nan     0.1000    0.0090
     7        1.0553             nan     0.1000    0.0021
     8        1.0249             nan     0.1000    0.0022
     9        0.9989             nan     0.1000    0.0024
    10        0.9723             nan     0.1000    0.0024
    20        0.7359             nan     0.1000    0.0043
    40        0.4782             nan     0.1000   -0.0024
    60        0.3117             nan     0.1000   -0.0010
    80        0.2144             nan     0.1000   -0.0018
   100        0.1597             nan     0.1000   -0.0001
   120        0.1129             nan     0.1000    0.0001
   140        0.0801             nan     0.1000    0.0003
   160        0.0596             nan     0.1000   -0.0002
   180        0.0444             nan     0.1000   -0.0001
   200        0.0328             nan     0.1000   -0.0003
   220        0.0247             nan     0.1000   -0.0001
   240        0.0185             nan     0.1000   -0.0000
   260        0.0134             nan     0.1000   -0.0000
   280        0.0100             nan     0.1000   -0.0001
   300        0.0074             nan     0.1000   -0.0000
   320        0.0054             nan     0.1000   -0.0000
   340        0.0040             nan     0.1000   -0.0000
   360        0.0029             nan     0.1000   -0.0000
   380        0.0022             nan     0.1000   -0.0000
   400        0.0017             nan     0.1000   -0.0000
   420        0.0013             nan     0.1000   -0.0000
   440        0.0009             nan     0.1000   -0.0000
   460        0.0007             nan     0.1000   -0.0000
   480        0.0005             nan     0.1000   -0.0000
   500        0.0004             nan     0.1000   -0.0000
Using 100 trees...

Using 100 trees...
Iter   TrainDeviance   ValidDeviance   StepSize   Improve
     1        1.2830             nan     0.1000    0.0056
     2        1.2702             nan     0.1000    0.0005
     3        1.2588             nan     0.1000    0.0024
     4        1.2453             nan     0.1000    0.0025
     5        1.2292             nan     0.1000    0.0021
     6        1.2118             nan     0.1000    0.0034
     7        1.2025             nan     0.1000   -0.0005
     8        1.1937             nan     0.1000   -0.0035
     9        1.1818             nan     0.1000   -0.0016
    10        1.1695             nan     0.1000    0.0044
    20        1.0583             nan     0.1000    0.0007
    40        0.8929             nan     0.1000    0.0004
    60        0.7803             nan     0.1000   -0.0011
    80        0.6850             nan     0.1000   -0.0007
   100        0.6165             nan     0.1000   -0.0025
   120        0.5512             nan     0.1000   -0.0006
   140        0.4942             nan     0.1000   -0.0011
   160        0.4473             nan     0.1000   -0.0002
   180        0.4101             nan     0.1000   -0.0015
   200        0.3664             nan     0.1000   -0.0005
   220        0.3325             nan     0.1000   -0.0002
   240        0.3035             nan     0.1000   -0.0008
   260        0.2762             nan     0.1000   -0.0005
   280        0.2523             nan     0.1000   -0.0009
   300        0.2294             nan     0.1000   -0.0005
   320        0.2090             nan     0.1000   -0.0008
   340        0.1914             nan     0.1000   -0.0009
   360        0.1751             nan     0.1000   -0.0005
   380        0.1621             nan     0.1000   -0.0008
   400        0.1463             nan     0.1000   -0.0006
   420        0.1356             nan     0.1000   -0.0003
   440        0.1258             nan     0.1000   -0.0002
   460        0.1171             nan     0.1000   -0.0006
   480        0.1080             nan     0.1000   -0.0003
   500        0.1010             nan     0.1000   -0.0004
Using 100 trees...

Using 100 trees...
Iter   TrainDeviance   ValidDeviance   StepSize   Improve
     1        1.1597             nan     0.1000    0.0104
     2        1.1123             nan     0.1000    0.0070
     3        1.0679             nan     0.1000    0.0133
     4        1.0263             nan     0.1000    0.0081
     5        0.9776             nan     0.1000    0.0138
     6        0.9395             nan     0.1000    0.0040
     7        0.9115             nan     0.1000    0.0058
     8        0.8679             nan     0.1000    0.0154
     9        0.8378             nan     0.1000    0.0063
    10        0.8124             nan     0.1000    0.0031
    20        0.6042             nan     0.1000   -0.0041
    40        0.3622             nan     0.1000   -0.0017
    60        0.2337             nan     0.1000   -0.0015
    80        0.1509             nan     0.1000   -0.0006
   100        0.1020             nan     0.1000    0.0007
   120        0.0731             nan     0.1000   -0.0004
   140        0.0503             nan     0.1000   -0.0003
   160        0.0347             nan     0.1000   -0.0003
   180        0.0247             nan     0.1000   -0.0001
   200        0.0186             nan     0.1000   -0.0000
Using 100 trees...

Using 100 trees...
Iter   TrainDeviance   ValidDeviance   StepSize   Improve
     1        1.1923             nan     0.1000    0.0069
     2        1.1753             nan     0.1000    0.0026
     3        1.1615             nan     0.1000    0.0018
     4        1.1516             nan     0.1000    0.0004
     5        1.1387             nan     0.1000   -0.0003
     6        1.1278             nan     0.1000   -0.0004
     7        1.1162             nan     0.1000    0.0014
     8        1.1100             nan     0.1000   -0.0045
     9        1.1033             nan     0.1000   -0.0027
    10        1.0905             nan     0.1000   -0.0000
    20        1.0045             nan     0.1000   -0.0024
    40        0.8622             nan     0.1000    0.0025
    60        0.7632             nan     0.1000    0.0008
    80        0.6797             nan     0.1000   -0.0020
   100        0.5942             nan     0.1000   -0.0024
   120        0.5245             nan     0.1000   -0.0008
   140        0.4875             nan     0.1000   -0.0025
   160        0.4404             nan     0.1000   -0.0009
   180        0.3995             nan     0.1000   -0.0009
   200        0.3621             nan     0.1000    0.0001
   220        0.3306             nan     0.1000   -0.0008
   240        0.3009             nan     0.1000   -0.0001
   260        0.2724             nan     0.1000   -0.0002
   280        0.2443             nan     0.1000   -0.0009
   300        0.2235             nan     0.1000   -0.0010
   320        0.2072             nan     0.1000   -0.0015
   340        0.1907             nan     0.1000   -0.0005
   360        0.1723             nan     0.1000   -0.0003
   380        0.1552             nan     0.1000   -0.0009
   400        0.1432             nan     0.1000   -0.0004
   420        0.1338             nan     0.1000   -0.0003
   440        0.1230             nan     0.1000   -0.0003
   460        0.1138             nan     0.1000   -0.0011
   480        0.1041             nan     0.1000   -0.0004
   500        0.0957             nan     0.1000   -0.0001
Using 100 trees...

Using 100 trees...
Iter   TrainDeviance   ValidDeviance   StepSize   Improve
     1        1.1992             nan     0.1000    0.0023
     2        1.1616             nan     0.1000    0.0117
     3        1.1250             nan     0.1000    0.0066
     4        1.0909             nan     0.1000   -0.0023
     5        1.0455             nan     0.1000    0.0129
     6        1.0061             nan     0.1000    0.0097
     7        0.9735             nan     0.1000    0.0064
     8        0.9347             nan     0.1000    0.0119
     9        0.9011             nan     0.1000    0.0031
    10        0.8777             nan     0.1000   -0.0027
    20        0.6627             nan     0.1000    0.0020
    40        0.4204             nan     0.1000    0.0012
    60        0.2772             nan     0.1000   -0.0021
    80        0.1891             nan     0.1000   -0.0010
   100        0.1283             nan     0.1000   -0.0001
   120        0.0914             nan     0.1000   -0.0006
   140        0.0656             nan     0.1000   -0.0004
   160        0.0483             nan     0.1000   -0.0003
   180        0.0350             nan     0.1000    0.0000
   200        0.0257             nan     0.1000   -0.0000
Using 100 trees...

Using 100 trees...
# ROC curve
roc_c <- roc_curve(gbm_model, group)
Warning: Ignoring unknown aesthetics: by
# save the results
supplements_models[["models_summ"]][["gbm_model_ra"]][[model_name]] <- gbm_model$model_summary

supplements_models[["roc_cs"]][["gbm_model_ra"]][[model_name]] <- gbm_model$kfold_rocobjs

# see the results
gbm_model$model_summary %>% t()
                                       [,1]
n.trees                         100.0000000
interaction.depth                 5.0000000
shrinkage                         0.1000000
n.minobsinnode                   30.0000000
auc                               0.9786579
auc_optimism_corrected            0.5328104
auc_optimism_corrected_CIL        0.4655437
auc_optimism_corrected_CIU        0.6350140
accuracy                          0.8909953
accuracy_optimism_corrected       0.5524883
accuracy_optimism_corrected_CIL   0.5178431
accuracy_optimism_corrected_CIU   0.5937303
roc_c

Genus level
level="genus"

Aggregate taxa

genus_data <- aggregate_taxa(ileum_asv_tab,
                             ileum_taxa_tab,
                             taxonomic_level = level)

ileum_genus_tab <- genus_data[[1]]
ileum_genus_taxa_tab <- genus_data[[2]]

pre_ltx vs healthy

group <- c("pre_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_ileum_uni_data <- binomial_prep(ileum_genus_tab,
                                     ileum_genus_taxa_tab,
                                     ileum_metadata,
                                     group, 
                                     usage="ml_ra")
Removing 84 ASV(s)
Removing 10 ASV(s)
# fit the model
gbm_model <- gbm_binomial(filt_ileum_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                              reuse=FALSE,
                              file=model_name,
                              Q="Q1",
                              overfitting_check = TRUE)
Warning: There were missing values in resampled performance measures.Warning: missing values found in aggregated results
Iter   TrainDeviance   ValidDeviance   StepSize   Improve
     1        1.3723             nan     0.1000   -0.0025
     2        1.3697             nan     0.1000   -0.0097
     3        1.3597             nan     0.1000   -0.0011
     4        1.3554             nan     0.1000   -0.0061
     5        1.3413             nan     0.1000    0.0022
     6        1.3285             nan     0.1000   -0.0045
     7        1.3210             nan     0.1000   -0.0041
     8        1.3174             nan     0.1000   -0.0053
     9        1.3090             nan     0.1000   -0.0088
    10        1.3003             nan     0.1000   -0.0020
    20        1.2197             nan     0.1000   -0.0011
    40        1.0782             nan     0.1000   -0.0108
    60        0.9770             nan     0.1000   -0.0016
    80        0.8832             nan     0.1000   -0.0011
   100        0.8230             nan     0.1000   -0.0039
Using 100 trees...

Warning: There were missing values in resampled performance measures.Warning: missing values found in aggregated results
Iter   TrainDeviance   ValidDeviance   StepSize   Improve
     1        1.3030             nan     0.1000    0.0184
     2        1.2549             nan     0.1000   -0.0031
     3        1.1898             nan     0.1000    0.0207
     4        1.1287             nan     0.1000    0.0166
     5        1.0695             nan     0.1000    0.0191
     6        1.0182             nan     0.1000    0.0102
     7        0.9773             nan     0.1000    0.0067
     8        0.9264             nan     0.1000    0.0165
     9        0.8969             nan     0.1000    0.0011
    10        0.8689             nan     0.1000   -0.0033
    20        0.5840             nan     0.1000    0.0045
    40        0.2815             nan     0.1000    0.0014
    60        0.1506             nan     0.1000    0.0002
    80        0.0831             nan     0.1000   -0.0005
   100        0.0444             nan     0.1000   -0.0002
Using 100 trees...

Using 100 trees...

Warning: There were missing values in resampled performance measures.Warning: missing values found in aggregated results
Iter   TrainDeviance   ValidDeviance   StepSize   Improve
     1        1.3200             nan     0.1000    0.0209
     2        1.2848             nan     0.1000   -0.0060
     3        1.2322             nan     0.1000    0.0108
     4        1.1762             nan     0.1000    0.0105
     5        1.1148             nan     0.1000    0.0183
     6        1.0673             nan     0.1000    0.0033
     7        1.0241             nan     0.1000   -0.0001
     8        0.9933             nan     0.1000    0.0011
     9        0.9503             nan     0.1000    0.0155
    10        0.9102             nan     0.1000    0.0092
    20        0.6668             nan     0.1000    0.0003
    40        0.3815             nan     0.1000   -0.0012
    60        0.2371             nan     0.1000   -0.0012
    80        0.1555             nan     0.1000   -0.0015
   100        0.1042             nan     0.1000   -0.0014
Using 100 trees...

Using 100 trees...

Warning: There were missing values in resampled performance measures.Warning: missing values found in aggregated results
Iter   TrainDeviance   ValidDeviance   StepSize   Improve
     1        1.3084             nan     0.1000    0.0233
     2        1.2422             nan     0.1000    0.0198
     3        1.1906             nan     0.1000    0.0125
     4        1.1572             nan     0.1000    0.0075
     5        1.1151             nan     0.1000    0.0050
     6        1.0718             nan     0.1000    0.0118
     7        1.0317             nan     0.1000    0.0106
     8        0.9971             nan     0.1000    0.0084
     9        0.9714             nan     0.1000   -0.0036
    10        0.9332             nan     0.1000    0.0089
    20        0.6665             nan     0.1000    0.0031
    40        0.3987             nan     0.1000    0.0011
    60        0.2417             nan     0.1000   -0.0013
    80        0.1479             nan     0.1000   -0.0012
   100        0.0960             nan     0.1000    0.0004
   120        0.0653             nan     0.1000   -0.0010
   140        0.0456             nan     0.1000   -0.0003
   160        0.0322             nan     0.1000   -0.0003
   180        0.0224             nan     0.1000    0.0001
   200        0.0147             nan     0.1000   -0.0001
Using 100 trees...

Using 100 trees...

Warning: There were missing values in resampled performance measures.Warning: missing values found in aggregated results
Iter   TrainDeviance   ValidDeviance   StepSize   Improve
     1        1.3502             nan     0.1000    0.0004
     2        1.3174             nan     0.1000    0.0093
     3        1.2743             nan     0.1000    0.0189
     4        1.2592             nan     0.1000   -0.0057
     5        1.2373             nan     0.1000    0.0060
     6        1.1979             nan     0.1000    0.0074
     7        1.1610             nan     0.1000    0.0108
     8        1.1378             nan     0.1000   -0.0020
     9        1.1217             nan     0.1000   -0.0050
    10        1.0951             nan     0.1000    0.0019
    20        0.9287             nan     0.1000   -0.0055
    40        0.7124             nan     0.1000   -0.0009
    60        0.5434             nan     0.1000   -0.0014
    80        0.4347             nan     0.1000    0.0004
   100        0.3480             nan     0.1000   -0.0027
   120        0.2807             nan     0.1000   -0.0011
   140        0.2305             nan     0.1000   -0.0016
   160        0.1902             nan     0.1000   -0.0014
   180        0.1529             nan     0.1000   -0.0005
   200        0.1218             nan     0.1000   -0.0006
Using 100 trees...

Using 100 trees...

Warning: There were missing values in resampled performance measures.Warning: missing values found in aggregated results
Iter   TrainDeviance   ValidDeviance   StepSize   Improve
     1        1.3700             nan     0.1000    0.0006
     2        1.3497             nan     0.1000    0.0041
     3        1.3410             nan     0.1000   -0.0073
     4        1.3218             nan     0.1000    0.0021
     5        1.3041             nan     0.1000   -0.0000
     6        1.2859             nan     0.1000   -0.0013
     7        1.2619             nan     0.1000    0.0042
     8        1.2395             nan     0.1000    0.0028
     9        1.2243             nan     0.1000   -0.0024
    10        1.2064             nan     0.1000    0.0060
    20        1.0918             nan     0.1000   -0.0026
    40        0.8993             nan     0.1000   -0.0054
    60        0.7590             nan     0.1000   -0.0023
    80        0.6496             nan     0.1000   -0.0028
   100        0.5542             nan     0.1000   -0.0032
   120        0.4768             nan     0.1000   -0.0025
   140        0.4154             nan     0.1000   -0.0019
   160        0.3671             nan     0.1000   -0.0013
   180        0.3235             nan     0.1000   -0.0028
   200        0.2847             nan     0.1000   -0.0019
   220        0.2515             nan     0.1000    0.0001
   240        0.2217             nan     0.1000   -0.0021
   260        0.1960             nan     0.1000   -0.0000
   280        0.1774             nan     0.1000   -0.0001
   300        0.1595             nan     0.1000   -0.0000
   320        0.1428             nan     0.1000   -0.0007
   340        0.1282             nan     0.1000    0.0003
   360        0.1145             nan     0.1000   -0.0007
   380        0.1034             nan     0.1000   -0.0005
   400        0.0940             nan     0.1000   -0.0010
   420        0.0816             nan     0.1000   -0.0003
   440        0.0741             nan     0.1000   -0.0007
   460        0.0662             nan     0.1000   -0.0002
   480        0.0604             nan     0.1000   -0.0004
   500        0.0541             nan     0.1000   -0.0004
Using 100 trees...

Using 100 trees...

Warning: There were missing values in resampled performance measures.Warning: missing values found in aggregated results
Iter   TrainDeviance   ValidDeviance   StepSize   Improve
     1        1.3662             nan     0.1000   -0.0028
     2        1.3543             nan     0.1000   -0.0023
     3        1.3443             nan     0.1000   -0.0086
     4        1.3288             nan     0.1000    0.0023
     5        1.3093             nan     0.1000    0.0069
     6        1.2941             nan     0.1000   -0.0006
     7        1.2757             nan     0.1000    0.0010
     8        1.2596             nan     0.1000   -0.0030
     9        1.2455             nan     0.1000   -0.0008
    10        1.2324             nan     0.1000   -0.0041
    20        1.1145             nan     0.1000    0.0029
    40        0.9375             nan     0.1000   -0.0004
    60        0.8036             nan     0.1000   -0.0004
    80        0.6932             nan     0.1000   -0.0018
   100        0.6011             nan     0.1000   -0.0020
   120        0.5285             nan     0.1000   -0.0002
   140        0.4754             nan     0.1000   -0.0009
   160        0.4267             nan     0.1000   -0.0017
   180        0.3833             nan     0.1000   -0.0002
   200        0.3419             nan     0.1000   -0.0018
   220        0.3089             nan     0.1000   -0.0016
   240        0.2813             nan     0.1000   -0.0019
   260        0.2554             nan     0.1000   -0.0006
   280        0.2308             nan     0.1000   -0.0012
   300        0.2091             nan     0.1000   -0.0017
   320        0.1924             nan     0.1000   -0.0009
   340        0.1798             nan     0.1000   -0.0006
   360        0.1637             nan     0.1000   -0.0010
   380        0.1496             nan     0.1000   -0.0008
   400        0.1373             nan     0.1000   -0.0003
   420        0.1278             nan     0.1000   -0.0006
   440        0.1200             nan     0.1000   -0.0009
   460        0.1132             nan     0.1000   -0.0004
   480        0.1036             nan     0.1000   -0.0003
   500        0.0961             nan     0.1000   -0.0007
Using 100 trees...

Using 100 trees...

Warning: There were missing values in resampled performance measures.Warning: missing values found in aggregated results
Iter   TrainDeviance   ValidDeviance   StepSize   Improve
     1        1.3512             nan     0.1000    0.0033
     2        1.3123             nan     0.1000    0.0105
     3        1.2865             nan     0.1000    0.0005
     4        1.2638             nan     0.1000    0.0013
     5        1.2162             nan     0.1000    0.0125
     6        1.1935             nan     0.1000   -0.0011
     7        1.1663             nan     0.1000    0.0035
     8        1.1579             nan     0.1000   -0.0000
     9        1.1327             nan     0.1000    0.0011
    10        1.1097             nan     0.1000    0.0002
    20        0.9587             nan     0.1000   -0.0024
    40        0.7486             nan     0.1000   -0.0008
    60        0.6065             nan     0.1000    0.0008
    80        0.4869             nan     0.1000   -0.0009
   100        0.3902             nan     0.1000    0.0000
   120        0.3235             nan     0.1000   -0.0016
   140        0.2620             nan     0.1000   -0.0018
   160        0.2217             nan     0.1000   -0.0002
   180        0.1823             nan     0.1000   -0.0005
   200        0.1551             nan     0.1000   -0.0014
Using 100 trees...

Using 100 trees...

Warning: There were missing values in resampled performance measures.Warning: missing values found in aggregated results
Iter   TrainDeviance   ValidDeviance   StepSize   Improve
     1        1.3352             nan     0.1000    0.0060
     2        1.2901             nan     0.1000    0.0055
     3        1.2608             nan     0.1000    0.0063
     4        1.2036             nan     0.1000    0.0226
     5        1.1675             nan     0.1000    0.0077
     6        1.1468             nan     0.1000    0.0018
     7        1.1248             nan     0.1000   -0.0071
     8        1.1114             nan     0.1000    0.0010
     9        1.1001             nan     0.1000    0.0002
    10        1.0793             nan     0.1000   -0.0024
    20        0.8868             nan     0.1000   -0.0023
    40        0.5894             nan     0.1000    0.0004
    60        0.4295             nan     0.1000    0.0010
    80        0.3394             nan     0.1000   -0.0025
   100        0.2535             nan     0.1000   -0.0013
   120        0.1967             nan     0.1000   -0.0032
   140        0.1562             nan     0.1000   -0.0007
   160        0.1267             nan     0.1000   -0.0006
   180        0.0981             nan     0.1000   -0.0000
   200        0.0772             nan     0.1000   -0.0002
   220        0.0611             nan     0.1000   -0.0000
   240        0.0486             nan     0.1000   -0.0001
   260        0.0387             nan     0.1000   -0.0000
   280        0.0308             nan     0.1000   -0.0002
   300        0.0240             nan     0.1000   -0.0001
   320        0.0195             nan     0.1000    0.0000
   340        0.0156             nan     0.1000   -0.0001
   360        0.0125             nan     0.1000   -0.0001
   380        0.0102             nan     0.1000   -0.0000
   400        0.0086             nan     0.1000    0.0000
   420        0.0068             nan     0.1000   -0.0000
   440        0.0055             nan     0.1000   -0.0000
   460        0.0045             nan     0.1000   -0.0001
   480        0.0036             nan     0.1000   -0.0000
   500        0.0028             nan     0.1000   -0.0000
Using 100 trees...

Using 100 trees...

Warning: There were missing values in resampled performance measures.Warning: missing values found in aggregated results
Iter   TrainDeviance   ValidDeviance   StepSize   Improve
     1        1.3461             nan     0.1000    0.0088
     2        1.3258             nan     0.1000    0.0019
     3        1.3060             nan     0.1000    0.0036
     4        1.2801             nan     0.1000    0.0081
     5        1.2555             nan     0.1000   -0.0021
     6        1.2390             nan     0.1000    0.0029
     7        1.2255             nan     0.1000    0.0013
     8        1.2113             nan     0.1000    0.0015
     9        1.1966             nan     0.1000    0.0004
    10        1.1793             nan     0.1000    0.0009
    20        1.0371             nan     0.1000   -0.0028
    40        0.8292             nan     0.1000    0.0028
    60        0.7022             nan     0.1000   -0.0032
    80        0.5876             nan     0.1000   -0.0031
   100        0.4980             nan     0.1000    0.0011
   120        0.4287             nan     0.1000   -0.0012
   140        0.3704             nan     0.1000   -0.0007
   160        0.3252             nan     0.1000   -0.0007
   180        0.2882             nan     0.1000   -0.0015
   200        0.2520             nan     0.1000   -0.0015
Using 100 trees...

Using 100 trees...

Warning: There were missing values in resampled performance measures.Warning: missing values found in aggregated results
Iter   TrainDeviance   ValidDeviance   StepSize   Improve
     1        1.3577             nan     0.1000    0.0025
     2        1.3280             nan     0.1000    0.0076
     3        1.2864             nan     0.1000    0.0101
     4        1.2323             nan     0.1000    0.0172
     5        1.1997             nan     0.1000    0.0072
     6        1.1760             nan     0.1000    0.0040
     7        1.1386             nan     0.1000    0.0099
     8        1.1116             nan     0.1000    0.0043
     9        1.0986             nan     0.1000   -0.0024
    10        1.0901             nan     0.1000   -0.0039
    20        0.8928             nan     0.1000    0.0000
    40        0.6549             nan     0.1000   -0.0025
    60        0.4846             nan     0.1000    0.0029
    80        0.3684             nan     0.1000   -0.0028
   100        0.2835             nan     0.1000   -0.0003
   120        0.2292             nan     0.1000   -0.0018
   140        0.1843             nan     0.1000   -0.0003
   160        0.1431             nan     0.1000   -0.0011
   180        0.1172             nan     0.1000   -0.0004
   200        0.0935             nan     0.1000   -0.0006
Using 100 trees...

Using 100 trees...
# ROC curve
roc_c <- roc_curve(gbm_model, group)
Warning: Ignoring unknown aesthetics: by
# save the results
supplements_models[["models_summ"]][["gbm_model_ra"]][[model_name]] <- gbm_model$model_summary

supplements_models[["roc_cs"]][["gbm_model_ra"]][[model_name]] <- gbm_model$kfold_rocobjs

# see the results
gbm_model$model_summary %>% t()
                                       [,1]
n.trees                         100.0000000
interaction.depth                 1.0000000
shrinkage                         0.1000000
n.minobsinnode                   20.0000000
auc                               0.9596869
auc_optimism_corrected            0.5087091
auc_optimism_corrected_CIL        0.3924655
auc_optimism_corrected_CIU        0.6057091
accuracy                          0.9020979
accuracy_optimism_corrected       0.5142506
accuracy_optimism_corrected_CIL   0.4721088
accuracy_optimism_corrected_CIU   0.5722273
roc_c

pre_ltx vs post_ltx

group <- c("pre_ltx","post_ltx")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_ileum_uni_data <- binomial_prep(ileum_genus_tab,
                                     ileum_genus_taxa_tab,
                                     ileum_metadata,
                                     group, 
                                     usage="ml_ra")
Removing 46 ASV(s)
Removing 6 ASV(s)
# fit the model
gbm_model <- gbm_binomial(filt_ileum_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                              reuse=FALSE,
                              file=model_name,
                              Q="Q1",
                              overfitting_check = TRUE)
Iter   TrainDeviance   ValidDeviance   StepSize   Improve
     1        1.2582             nan     0.1000    0.0046
     2        1.2401             nan     0.1000    0.0016
     3        1.2234             nan     0.1000   -0.0049
     4        1.2095             nan     0.1000   -0.0023
     5        1.1941             nan     0.1000   -0.0031
     6        1.1745             nan     0.1000   -0.0030
     7        1.1607             nan     0.1000   -0.0085
     8        1.1413             nan     0.1000   -0.0068
     9        1.1302             nan     0.1000   -0.0072
    10        1.1073             nan     0.1000    0.0040
    20        0.9621             nan     0.1000    0.0031
    40        0.7701             nan     0.1000   -0.0044
    60        0.6131             nan     0.1000   -0.0035
    80        0.4966             nan     0.1000   -0.0018
   100        0.4189             nan     0.1000   -0.0027
Using 100 trees...
Iter   TrainDeviance   ValidDeviance   StepSize   Improve
     1        1.2383             nan     0.1000    0.0130
     2        1.1761             nan     0.1000    0.0140
     3        1.1378             nan     0.1000    0.0100
     4        1.0811             nan     0.1000    0.0189
     5        1.0434             nan     0.1000    0.0069
     6        1.0105             nan     0.1000   -0.0014
     7        0.9626             nan     0.1000    0.0118
     8        0.9379             nan     0.1000   -0.0031
     9        0.8974             nan     0.1000    0.0079
    10        0.8488             nan     0.1000    0.0153
    20        0.6199             nan     0.1000   -0.0022
    40        0.3440             nan     0.1000   -0.0004
    60        0.1950             nan     0.1000   -0.0026
    80        0.1196             nan     0.1000   -0.0006
   100        0.0738             nan     0.1000   -0.0002
   120        0.0488             nan     0.1000   -0.0002
   140        0.0314             nan     0.1000    0.0001
   160        0.0194             nan     0.1000   -0.0000
   180        0.0127             nan     0.1000   -0.0001
   200        0.0083             nan     0.1000   -0.0001
   220        0.0057             nan     0.1000   -0.0000
   240        0.0040             nan     0.1000   -0.0000
   260        0.0028             nan     0.1000    0.0000
   280        0.0020             nan     0.1000   -0.0000
   300        0.0016             nan     0.1000    0.0000
   320        0.0012             nan     0.1000   -0.0000
   340        0.0007             nan     0.1000    0.0000
   360        0.0005             nan     0.1000    0.0000
   380        0.0003             nan     0.1000    0.0000
   400        0.0002             nan     0.1000   -0.0000
   420        0.0002             nan     0.1000   -0.0000
   440        0.0001             nan     0.1000   -0.0000
   460        0.0001             nan     0.1000   -0.0000
   480        0.0001             nan     0.1000   -0.0000
   500        0.0001             nan     0.1000   -0.0000
Using 100 trees...

Using 100 trees...
Iter   TrainDeviance   ValidDeviance   StepSize   Improve
     1        1.1365             nan     0.1000    0.0162
     2        1.0989             nan     0.1000    0.0057
     3        1.0449             nan     0.1000    0.0135
     4        1.0005             nan     0.1000    0.0130
     5        0.9628             nan     0.1000    0.0053
     6        0.9249             nan     0.1000    0.0065
     7        0.8946             nan     0.1000    0.0058
     8        0.8698             nan     0.1000    0.0042
     9        0.8433             nan     0.1000    0.0016
    10        0.8208             nan     0.1000    0.0035
    20        0.6292             nan     0.1000    0.0026
    40        0.3907             nan     0.1000    0.0000
    60        0.2584             nan     0.1000    0.0008
    80        0.1780             nan     0.1000    0.0002
   100        0.1277             nan     0.1000   -0.0008
Using 100 trees...

Using 100 trees...
Iter   TrainDeviance   ValidDeviance   StepSize   Improve
     1        1.2241             nan     0.1000    0.0197
     2        1.1588             nan     0.1000    0.0151
     3        1.0968             nan     0.1000    0.0178
     4        1.0464             nan     0.1000    0.0098
     5        0.9918             nan     0.1000    0.0176
     6        0.9433             nan     0.1000    0.0162
     7        0.8996             nan     0.1000    0.0132
     8        0.8554             nan     0.1000    0.0124
     9        0.8201             nan     0.1000    0.0081
    10        0.7855             nan     0.1000    0.0056
    20        0.5473             nan     0.1000   -0.0027
    40        0.2783             nan     0.1000   -0.0004
    60        0.1619             nan     0.1000    0.0006
    80        0.0938             nan     0.1000   -0.0004
   100        0.0577             nan     0.1000   -0.0002
Using 100 trees...

Using 100 trees...
Iter   TrainDeviance   ValidDeviance   StepSize   Improve
     1        1.2451             nan     0.1000    0.0187
     2        1.1917             nan     0.1000    0.0118
     3        1.1262             nan     0.1000    0.0246
     4        1.0727             nan     0.1000    0.0195
     5        1.0168             nan     0.1000    0.0136
     6        0.9683             nan     0.1000    0.0104
     7        0.9375             nan     0.1000   -0.0018
     8        0.8973             nan     0.1000    0.0130
     9        0.8627             nan     0.1000    0.0073
    10        0.8333             nan     0.1000    0.0073
    20        0.5849             nan     0.1000   -0.0016
    40        0.3013             nan     0.1000    0.0004
    60        0.1794             nan     0.1000   -0.0003
    80        0.1062             nan     0.1000   -0.0011
   100        0.0665             nan     0.1000   -0.0001
Using 100 trees...

Using 100 trees...
Iter   TrainDeviance   ValidDeviance   StepSize   Improve
     1        1.1530             nan     0.1000    0.0188
     2        1.0868             nan     0.1000    0.0172
     3        1.0388             nan     0.1000    0.0181
     4        0.9878             nan     0.1000    0.0142
     5        0.9598             nan     0.1000    0.0045
     6        0.9125             nan     0.1000    0.0052
     7        0.8746             nan     0.1000   -0.0009
     8        0.8264             nan     0.1000    0.0145
     9        0.7928             nan     0.1000    0.0065
    10        0.7645             nan     0.1000    0.0030
    20        0.5141             nan     0.1000   -0.0027
    40        0.2733             nan     0.1000    0.0012
    60        0.1503             nan     0.1000   -0.0006
    80        0.0946             nan     0.1000   -0.0008
   100        0.0564             nan     0.1000   -0.0004
Using 100 trees...

Using 100 trees...
Iter   TrainDeviance   ValidDeviance   StepSize   Improve
     1        1.2870             nan     0.1000    0.0017
     2        1.2665             nan     0.1000    0.0055
     3        1.2507             nan     0.1000    0.0042
     4        1.2298             nan     0.1000    0.0036
     5        1.2206             nan     0.1000   -0.0006
     6        1.2063             nan     0.1000    0.0005
     7        1.1856             nan     0.1000    0.0047
     8        1.1696             nan     0.1000    0.0032
     9        1.1587             nan     0.1000   -0.0007
    10        1.1416             nan     0.1000    0.0039
    20        0.9969             nan     0.1000    0.0029
    40        0.8131             nan     0.1000   -0.0014
    60        0.6621             nan     0.1000   -0.0036
    80        0.5621             nan     0.1000   -0.0001
   100        0.4853             nan     0.1000   -0.0007
   120        0.4116             nan     0.1000   -0.0005
   140        0.3597             nan     0.1000   -0.0013
   160        0.3106             nan     0.1000   -0.0002
   180        0.2672             nan     0.1000   -0.0006
   200        0.2327             nan     0.1000   -0.0014
   220        0.1989             nan     0.1000   -0.0000
   240        0.1741             nan     0.1000   -0.0022
   260        0.1494             nan     0.1000   -0.0015
   280        0.1314             nan     0.1000   -0.0003
   300        0.1100             nan     0.1000   -0.0003
   320        0.0956             nan     0.1000   -0.0004
   340        0.0844             nan     0.1000   -0.0006
   360        0.0742             nan     0.1000   -0.0005
   380        0.0660             nan     0.1000   -0.0001
   400        0.0567             nan     0.1000   -0.0005
   420        0.0499             nan     0.1000   -0.0001
   440        0.0446             nan     0.1000   -0.0002
   460        0.0390             nan     0.1000   -0.0002
   480        0.0346             nan     0.1000   -0.0002
   500        0.0310             nan     0.1000   -0.0003
Using 100 trees...

Using 100 trees...
Iter   TrainDeviance   ValidDeviance   StepSize   Improve
     1        1.2766             nan     0.1000    0.0047
     2        1.2378             nan     0.1000    0.0157
     3        1.2000             nan     0.1000    0.0057
     4        1.1789             nan     0.1000   -0.0002
     5        1.1469             nan     0.1000    0.0055
     6        1.1188             nan     0.1000    0.0030
     7        1.0919             nan     0.1000    0.0053
     8        1.0484             nan     0.1000    0.0164
     9        1.0316             nan     0.1000    0.0008
    10        1.0162             nan     0.1000    0.0001
    20        0.8296             nan     0.1000   -0.0068
    40        0.5847             nan     0.1000    0.0003
    60        0.4331             nan     0.1000   -0.0014
    80        0.3347             nan     0.1000   -0.0011
   100        0.2660             nan     0.1000   -0.0016
   120        0.2030             nan     0.1000   -0.0010
   140        0.1619             nan     0.1000   -0.0008
   160        0.1272             nan     0.1000   -0.0005
   180        0.0980             nan     0.1000   -0.0001
   200        0.0770             nan     0.1000   -0.0003
Using 100 trees...

Using 100 trees...
Iter   TrainDeviance   ValidDeviance   StepSize   Improve
     1        1.2197             nan     0.1000    0.0312
     2        1.1551             nan     0.1000    0.0212
     3        1.0946             nan     0.1000    0.0193
     4        1.0287             nan     0.1000    0.0128
     5        0.9855             nan     0.1000    0.0048
     6        0.9425             nan     0.1000    0.0066
     7        0.8978             nan     0.1000    0.0093
     8        0.8636             nan     0.1000    0.0076
     9        0.8279             nan     0.1000    0.0064
    10        0.7959             nan     0.1000    0.0045
    20        0.5491             nan     0.1000    0.0012
    40        0.3088             nan     0.1000    0.0013
    60        0.1763             nan     0.1000   -0.0016
    80        0.1017             nan     0.1000   -0.0001
   100        0.0635             nan     0.1000    0.0002
Using 100 trees...

Using 100 trees...
Iter   TrainDeviance   ValidDeviance   StepSize   Improve
     1        1.1430             nan     0.1000    0.0068
     2        1.1010             nan     0.1000    0.0088
     3        1.0700             nan     0.1000    0.0082
     4        1.0447             nan     0.1000    0.0021
     5        1.0176             nan     0.1000    0.0037
     6        0.9914             nan     0.1000    0.0009
     7        0.9713             nan     0.1000   -0.0006
     8        0.9666             nan     0.1000   -0.0096
     9        0.9399             nan     0.1000    0.0028
    10        0.9181             nan     0.1000    0.0015
    20        0.7452             nan     0.1000    0.0018
    40        0.5257             nan     0.1000   -0.0001
    60        0.3726             nan     0.1000   -0.0004
    80        0.2820             nan     0.1000    0.0007
   100        0.2123             nan     0.1000   -0.0010
   120        0.1567             nan     0.1000   -0.0008
   140        0.1152             nan     0.1000   -0.0009
   160        0.0900             nan     0.1000    0.0001
   180        0.0701             nan     0.1000   -0.0000
   200        0.0547             nan     0.1000   -0.0005
Using 100 trees...

Using 100 trees...
Iter   TrainDeviance   ValidDeviance   StepSize   Improve
     1        1.2159             nan     0.1000    0.0254
     2        1.1575             nan     0.1000    0.0199
     3        1.0795             nan     0.1000    0.0292
     4        1.0286             nan     0.1000    0.0116
     5        0.9819             nan     0.1000    0.0101
     6        0.9291             nan     0.1000    0.0133
     7        0.8804             nan     0.1000    0.0143
     8        0.8395             nan     0.1000    0.0121
     9        0.8102             nan     0.1000    0.0049
    10        0.7763             nan     0.1000    0.0090
    20        0.5472             nan     0.1000    0.0008
    40        0.2957             nan     0.1000   -0.0001
    60        0.1699             nan     0.1000   -0.0016
    80        0.1014             nan     0.1000    0.0004
   100        0.0595             nan     0.1000    0.0003
   120        0.0392             nan     0.1000   -0.0000
   140        0.0238             nan     0.1000   -0.0002
   160        0.0158             nan     0.1000   -0.0001
   180        0.0097             nan     0.1000   -0.0000
   200        0.0058             nan     0.1000    0.0000
Using 100 trees...

Using 100 trees...
# ROC curve
roc_c <- roc_curve(gbm_model, group)
Warning: Ignoring unknown aesthetics: by
# save the results
supplements_models[["models_summ"]][["gbm_model_ra"]][[model_name]] <- gbm_model$model_summary

supplements_models[["roc_cs"]][["gbm_model_ra"]][[model_name]] <- gbm_model$kfold_rocobjs

# see the results
gbm_model$model_summary %>% t()
                                       [,1]
n.trees                         100.0000000
interaction.depth                 5.0000000
shrinkage                         0.1000000
n.minobsinnode                   20.0000000
auc                               1.0000000
auc_optimism_corrected            0.5036906
auc_optimism_corrected_CIL        0.4093266
auc_optimism_corrected_CIU        0.5839071
accuracy                          0.9951923
accuracy_optimism_corrected       0.5946566
accuracy_optimism_corrected_CIL   0.5342840
accuracy_optimism_corrected_CIU   0.6931171
roc_c

post_ltx vs healthy

group <- c("post_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_ileum_uni_data <- binomial_prep(ileum_genus_tab,
                                     ileum_genus_taxa_tab,
                                     ileum_metadata,
                                     group, 
                                     usage="ml_ra")
Removing 45 ASV(s)
Removing 2 ASV(s)
# fit the model
gbm_model <- gbm_binomial(filt_ileum_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                              reuse=FALSE,
                              file=model_name,
                              Q="Q1",
                              overfitting_check = TRUE)
Iter   TrainDeviance   ValidDeviance   StepSize   Improve
     1        1.2422             nan     0.1000   -0.0026
     2        1.2156             nan     0.1000   -0.0134
     3        1.1797             nan     0.1000    0.0037
     4        1.1210             nan     0.1000    0.0011
     5        1.0908             nan     0.1000   -0.0091
     6        1.0687             nan     0.1000   -0.0014
     7        1.0276             nan     0.1000    0.0063
     8        0.9968             nan     0.1000    0.0033
     9        0.9649             nan     0.1000   -0.0018
    10        0.9486             nan     0.1000   -0.0132
    20        0.7349             nan     0.1000   -0.0054
    40        0.4732             nan     0.1000   -0.0014
    60        0.3156             nan     0.1000   -0.0008
    80        0.2148             nan     0.1000   -0.0025
   100        0.1511             nan     0.1000   -0.0025
   120        0.1042             nan     0.1000   -0.0005
   140        0.0747             nan     0.1000   -0.0003
   160        0.0539             nan     0.1000   -0.0004
   180        0.0396             nan     0.1000   -0.0004
   200        0.0290             nan     0.1000   -0.0002
Using 200 trees...
Iter   TrainDeviance   ValidDeviance   StepSize   Improve
     1        1.2720             nan     0.1000    0.0069
     2        1.2143             nan     0.1000    0.0156
     3        1.1670             nan     0.1000    0.0058
     4        1.1102             nan     0.1000    0.0157
     5        1.0487             nan     0.1000    0.0189
     6        1.0161             nan     0.1000   -0.0028
     7        0.9675             nan     0.1000    0.0058
     8        0.9215             nan     0.1000    0.0088
     9        0.8796             nan     0.1000    0.0096
    10        0.8438             nan     0.1000    0.0043
    20        0.5948             nan     0.1000    0.0027
    40        0.3258             nan     0.1000   -0.0002
    60        0.1776             nan     0.1000   -0.0002
    80        0.0997             nan     0.1000   -0.0000
   100        0.0599             nan     0.1000   -0.0008
   120        0.0365             nan     0.1000   -0.0002
   140        0.0230             nan     0.1000   -0.0001
   160        0.0139             nan     0.1000   -0.0001
   180        0.0090             nan     0.1000   -0.0001
   200        0.0054             nan     0.1000   -0.0000
   220        0.0034             nan     0.1000   -0.0000
   240        0.0022             nan     0.1000   -0.0000
   260        0.0014             nan     0.1000   -0.0000
   280        0.0009             nan     0.1000   -0.0000
   300        0.0005             nan     0.1000   -0.0000
   320        0.0003             nan     0.1000   -0.0000
   340        0.0002             nan     0.1000   -0.0000
   360        0.0001             nan     0.1000    0.0000
   380        0.0001             nan     0.1000   -0.0000
   400        0.0001             nan     0.1000   -0.0000
   420        0.0000             nan     0.1000   -0.0000
   440        0.0000             nan     0.1000   -0.0000
   460        0.0000             nan     0.1000   -0.0000
   480        0.0000             nan     0.1000   -0.0000
   500        0.0000             nan     0.1000   -0.0000
Using 200 trees...

Using 200 trees...
Iter   TrainDeviance   ValidDeviance   StepSize   Improve
     1        1.2199             nan     0.1000    0.0043
     2        1.1786             nan     0.1000    0.0177
     3        1.1593             nan     0.1000   -0.0000
     4        1.1297             nan     0.1000    0.0067
     5        1.0911             nan     0.1000    0.0113
     6        1.0698             nan     0.1000    0.0004
     7        1.0457             nan     0.1000   -0.0046
     8        1.0220             nan     0.1000    0.0002
     9        0.9947             nan     0.1000    0.0059
    10        0.9812             nan     0.1000   -0.0060
    20        0.8164             nan     0.1000    0.0052
    40        0.5575             nan     0.1000   -0.0011
    60        0.4109             nan     0.1000   -0.0016
    80        0.3032             nan     0.1000   -0.0003
   100        0.2222             nan     0.1000   -0.0006
   120        0.1641             nan     0.1000   -0.0011
   140        0.1213             nan     0.1000   -0.0004
   160        0.0919             nan     0.1000   -0.0001
   180        0.0697             nan     0.1000   -0.0000
   200        0.0537             nan     0.1000    0.0003
Using 200 trees...

Using 200 trees...
Iter   TrainDeviance   ValidDeviance   StepSize   Improve
     1        1.2680             nan     0.1000    0.0196
     2        1.2109             nan     0.1000    0.0121
     3        1.1546             nan     0.1000    0.0091
     4        1.1014             nan     0.1000    0.0139
     5        1.0386             nan     0.1000    0.0173
     6        0.9928             nan     0.1000    0.0090
     7        0.9595             nan     0.1000    0.0034
     8        0.9315             nan     0.1000    0.0025
     9        0.8943             nan     0.1000    0.0062
    10        0.8768             nan     0.1000   -0.0056
    20        0.6342             nan     0.1000    0.0027
    40        0.3451             nan     0.1000   -0.0007
    60        0.2004             nan     0.1000   -0.0004
    80        0.1191             nan     0.1000   -0.0007
   100        0.0714             nan     0.1000   -0.0006
   120        0.0450             nan     0.1000   -0.0003
   140        0.0279             nan     0.1000   -0.0002
   160        0.0168             nan     0.1000   -0.0000
   180        0.0107             nan     0.1000   -0.0000
   200        0.0070             nan     0.1000   -0.0000
   220        0.0044             nan     0.1000   -0.0000
   240        0.0028             nan     0.1000    0.0000
   260        0.0018             nan     0.1000   -0.0000
   280        0.0012             nan     0.1000   -0.0000
   300        0.0007             nan     0.1000   -0.0000
   320        0.0005             nan     0.1000    0.0000
   340        0.0003             nan     0.1000    0.0000
   360        0.0002             nan     0.1000   -0.0000
   380        0.0001             nan     0.1000   -0.0000
   400        0.0001             nan     0.1000    0.0000
   420        0.0000             nan     0.1000   -0.0000
   440        0.0000             nan     0.1000   -0.0000
   460        0.0000             nan     0.1000    0.0000
   480        0.0000             nan     0.1000   -0.0000
   500        0.0000             nan     0.1000   -0.0000
Using 200 trees...

Using 200 trees...
Iter   TrainDeviance   ValidDeviance   StepSize   Improve
     1        1.2657             nan     0.1000    0.0004
     2        1.2363             nan     0.1000    0.0023
     3        1.2069             nan     0.1000    0.0043
     4        1.1743             nan     0.1000    0.0054
     5        1.1543             nan     0.1000    0.0011
     6        1.1274             nan     0.1000   -0.0031
     7        1.0989             nan     0.1000    0.0046
     8        1.0722             nan     0.1000    0.0048
     9        1.0454             nan     0.1000    0.0071
    10        1.0202             nan     0.1000    0.0024
    20        0.8129             nan     0.1000   -0.0039
    40        0.5768             nan     0.1000   -0.0013
    60        0.4243             nan     0.1000   -0.0021
    80        0.3151             nan     0.1000    0.0003
   100        0.2445             nan     0.1000   -0.0009
   120        0.1909             nan     0.1000    0.0006
   140        0.1508             nan     0.1000    0.0003
   160        0.1174             nan     0.1000   -0.0007
   180        0.0924             nan     0.1000   -0.0006
   200        0.0718             nan     0.1000   -0.0002
Using 200 trees...

Using 200 trees...
Iter   TrainDeviance   ValidDeviance   StepSize   Improve
     1        1.1124             nan     0.1000    0.0269
     2        1.0492             nan     0.1000    0.0180
     3        0.9939             nan     0.1000    0.0148
     4        0.9538             nan     0.1000    0.0023
     5        0.9180             nan     0.1000    0.0093
     6        0.8673             nan     0.1000    0.0136
     7        0.8154             nan     0.1000    0.0202
     8        0.7783             nan     0.1000    0.0043
     9        0.7476             nan     0.1000    0.0080
    10        0.7130             nan     0.1000    0.0097
    20        0.4759             nan     0.1000   -0.0005
    40        0.2483             nan     0.1000    0.0002
    60        0.1330             nan     0.1000    0.0002
    80        0.0811             nan     0.1000   -0.0006
   100        0.0481             nan     0.1000   -0.0002
   120        0.0302             nan     0.1000   -0.0002
   140        0.0193             nan     0.1000   -0.0004
   160        0.0129             nan     0.1000   -0.0001
   180        0.0079             nan     0.1000   -0.0001
   200        0.0049             nan     0.1000    0.0000
   220        0.0031             nan     0.1000   -0.0000
   240        0.0025             nan     0.1000   -0.0000
   260        0.0016             nan     0.1000   -0.0000
   280        0.0016             nan     0.1000   -0.0001
   300        0.0010             nan     0.1000   -0.0000
   320        0.0006             nan     0.1000    0.0000
   340        0.0004             nan     0.1000   -0.0000
   360        0.0003             nan     0.1000    0.0000
   380        0.0002             nan     0.1000    0.0000
   400        0.0002             nan     0.1000   -0.0000
   420        0.0003             nan     0.1000   -0.0000
   440        0.0001             nan     0.1000   -0.0000
   460        0.0001             nan     0.1000   -0.0000
   480        0.0000             nan     0.1000   -0.0000
   500        0.0000             nan     0.1000   -0.0000
Using 200 trees...

Using 200 trees...
Iter   TrainDeviance   ValidDeviance   StepSize   Improve
     1        1.2716             nan     0.1000    0.0200
     2        1.2193             nan     0.1000    0.0126
     3        1.1544             nan     0.1000    0.0222
     4        1.0916             nan     0.1000    0.0210
     5        1.0490             nan     0.1000    0.0053
     6        1.0013             nan     0.1000    0.0108
     7        0.9655             nan     0.1000    0.0052
     8        0.9278             nan     0.1000    0.0107
     9        0.8945             nan     0.1000    0.0033
    10        0.8509             nan     0.1000    0.0068
    20        0.6095             nan     0.1000    0.0009
    40        0.3509             nan     0.1000   -0.0024
    60        0.2016             nan     0.1000   -0.0007
    80        0.1242             nan     0.1000   -0.0008
   100        0.0772             nan     0.1000   -0.0001
Using 200 trees...

Using 200 trees...
Iter   TrainDeviance   ValidDeviance   StepSize   Improve
     1        1.2165             nan     0.1000    0.0214
     2        1.1549             nan     0.1000    0.0139
     3        1.0910             nan     0.1000    0.0218
     4        1.0410             nan     0.1000    0.0148
     5        0.9962             nan     0.1000    0.0101
     6        0.9523             nan     0.1000    0.0135
     7        0.9185             nan     0.1000    0.0034
     8        0.8832             nan     0.1000   -0.0008
     9        0.8450             nan     0.1000    0.0115
    10        0.8050             nan     0.1000    0.0063
    20        0.5656             nan     0.1000   -0.0026
    40        0.2911             nan     0.1000    0.0003
    60        0.1729             nan     0.1000   -0.0007
    80        0.1000             nan     0.1000    0.0003
   100        0.0654             nan     0.1000   -0.0007
Using 200 trees...

Using 200 trees...
Iter   TrainDeviance   ValidDeviance   StepSize   Improve
     1        1.1998             nan     0.1000    0.0156
     2        1.1546             nan     0.1000    0.0165
     3        1.1114             nan     0.1000    0.0087
     4        1.0538             nan     0.1000    0.0192
     5        1.0260             nan     0.1000   -0.0002
     6        0.9916             nan     0.1000    0.0093
     7        0.9638             nan     0.1000    0.0012
     8        0.9342             nan     0.1000    0.0083
     9        0.9028             nan     0.1000    0.0012
    10        0.8712             nan     0.1000    0.0103
    20        0.6633             nan     0.1000    0.0020
    40        0.4204             nan     0.1000   -0.0012
    60        0.2712             nan     0.1000    0.0010
    80        0.1832             nan     0.1000    0.0005
   100        0.1291             nan     0.1000   -0.0002
Using 200 trees...

Using 200 trees...
Iter   TrainDeviance   ValidDeviance   StepSize   Improve
     1        1.2268             nan     0.1000    0.0256
     2        1.1605             nan     0.1000    0.0203
     3        1.0940             nan     0.1000    0.0257
     4        1.0399             nan     0.1000    0.0108
     5        1.0042             nan     0.1000    0.0009
     6        0.9574             nan     0.1000    0.0118
     7        0.8991             nan     0.1000    0.0209
     8        0.8459             nan     0.1000    0.0138
     9        0.8152             nan     0.1000    0.0074
    10        0.7727             nan     0.1000    0.0085
    20        0.5375             nan     0.1000   -0.0013
    40        0.2890             nan     0.1000   -0.0013
    60        0.1673             nan     0.1000   -0.0011
    80        0.1029             nan     0.1000   -0.0006
   100        0.0608             nan     0.1000   -0.0002
   120        0.0384             nan     0.1000   -0.0003
   140        0.0238             nan     0.1000   -0.0002
   160        0.0159             nan     0.1000   -0.0002
   180        0.0105             nan     0.1000   -0.0001
   200        0.0065             nan     0.1000   -0.0001
   220        0.0042             nan     0.1000   -0.0000
   240        0.0028             nan     0.1000   -0.0000
   260        0.0018             nan     0.1000   -0.0000
   280        0.0011             nan     0.1000   -0.0000
   300        0.0007             nan     0.1000   -0.0000
   320        0.0005             nan     0.1000    0.0000
   340        0.0003             nan     0.1000   -0.0000
   360        0.0002             nan     0.1000    0.0000
   380        0.0001             nan     0.1000   -0.0000
   400        0.0001             nan     0.1000    0.0000
   420        0.0000             nan     0.1000    0.0000
   440        0.0000             nan     0.1000   -0.0000
   460        0.0000             nan     0.1000   -0.0000
   480        0.0000             nan     0.1000    0.0000
   500        0.0000             nan     0.1000   -0.0000
Using 200 trees...

Using 200 trees...
Iter   TrainDeviance   ValidDeviance   StepSize   Improve
     1        1.2663             nan     0.1000    0.0129
     2        1.2227             nan     0.1000    0.0084
     3        1.1793             nan     0.1000    0.0098
     4        1.1333             nan     0.1000    0.0144
     5        1.0936             nan     0.1000    0.0104
     6        1.0592             nan     0.1000    0.0071
     7        1.0238             nan     0.1000    0.0092
     8        0.9995             nan     0.1000   -0.0000
     9        0.9593             nan     0.1000    0.0130
    10        0.9284             nan     0.1000    0.0085
    20        0.7129             nan     0.1000    0.0013
    40        0.4611             nan     0.1000    0.0015
    60        0.3163             nan     0.1000   -0.0013
    80        0.2273             nan     0.1000   -0.0011
   100        0.1598             nan     0.1000    0.0002
   120        0.1138             nan     0.1000   -0.0002
   140        0.0839             nan     0.1000    0.0001
   160        0.0615             nan     0.1000    0.0001
   180        0.0448             nan     0.1000   -0.0002
   200        0.0336             nan     0.1000   -0.0000
Using 200 trees...

Using 200 trees...
# ROC curve
roc_c <- roc_curve(gbm_model, group)
Warning: Ignoring unknown aesthetics: by
# save the results
supplements_models[["models_summ"]][["gbm_model_ra"]][[model_name]] <- gbm_model$model_summary

supplements_models[["roc_cs"]][["gbm_model_ra"]][[model_name]] <- gbm_model$kfold_rocobjs

# see the results
gbm_model$model_summary %>% t()
                                       [,1]
n.trees                         200.0000000
interaction.depth                 5.0000000
shrinkage                         0.1000000
n.minobsinnode                   10.0000000
auc                               1.0000000
auc_optimism_corrected            0.5026238
auc_optimism_corrected_CIL        0.4422162
auc_optimism_corrected_CIU        0.5990286
accuracy                          1.0000000
accuracy_optimism_corrected       0.5550849
accuracy_optimism_corrected_CIL   0.4913026
accuracy_optimism_corrected_CIU   0.6206456
roc_c

Saving results

models_list <- list()

for (model_name in names(supplements_models$models_summ)){
  df <- do.call(rbind, supplements_models$models_summ[[model_name]])
  models_list[[model_name]] <- df
}

write.xlsx(models_list,
           file=file.path(path,paste0("supplements_models_",segment,".xlsx")),
           rowNames=TRUE)

Data Analysis - Colon

segment="colon"

Machine learning

path = "../results/Q1/models_overfitting_check"

ElasticNet

model="enet"

ASV level

level="ASV"
pre_ltx vs healthy
group <- c("pre_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])

pre_ltx vs post_ltx
group <- c("pre_ltx","post_ltx")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_colon_uni_data <- binomial_prep(colon_asv_tab,
                                     colon_taxa_tab,
                                     colon_metadata,
                                     group, usage="ml_clr",
                                     patient = TRUE)
Removing 1157 ASV(s)
Removing 52 ASV(s)
# fit the model
enet_model <- glmnet_binomial(filt_colon_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", N=10,
                              clust_var="Patient",
                              reuse=FALSE,
                              file=model_name,
                              Q="Q1",
                              overfitting_check = TRUE)

# ROC
roc_c <- roc_curve(enet_model, group)
Warning: Ignoring unknown aesthetics: by
# save the results
models_summ[[model_name]] <- enet_model$model_summary
models_cm[[model_name]] <- enet_model$conf_matrices$original
roc_cs[[model_name]] <- enet_model$kfold_rocobjs
betas[[model_name]] <- as.matrix(enet_model$betas)

# see the results
enet_model$model_summary %>% t()
                                      [,1]
alpha                            0.0000000
lambda                          67.4876631
auc                              0.6741089
auc_czech                        0.7313255
auc_no                           0.6465953
auc_optimism_corrected           0.4821532
auc_optimism_corrected_CIL       0.4309691
auc_optimism_corrected_CIU       0.5262457
accuracy                         0.5826377
accuracy_czech                         NaN
accuracy_no                      0.5787966
accuracy_optimism_corrected      0.5552662
accuracy_optimism_corrected_CIL  0.5068982
accuracy_optimism_corrected_CIU  0.6114882
enet_model$conf_matrices
$original
    0  
0 349 0
1 250 0

$czech
    0  
0 147 0
1 103 0

$no
    0  
0 202 0
1 147 0
enet_model$plot


roc_c

post_ltx vs healthy
group <- c("post_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_colon_uni_data <- binomial_prep(colon_asv_tab,
                                     colon_taxa_tab,
                                     colon_metadata,
                                     group, 
                                     usage="ml_clr",
                                     patient = TRUE)
Removing 1096 ASV(s)
Removing 50 ASV(s)
# fit the model
enet_model <- glmnet_binomial(filt_colon_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                              clust_var="Patient",
                              reuse=FALSE,
                              file=model_name,
                              Q="Q1",
                              overfitting_check = TRUE)

# ROC
roc_c <- roc_curve(enet_model, group)
Warning: Ignoring unknown aesthetics: by
# save the results
models_summ[[model_name]] <- enet_model$model_summary
models_cm[[model_name]] <- enet_model$conf_matrices$original
roc_cs[[model_name]] <- enet_model$kfold_rocobjs
betas[[model_name]] <- as.matrix(enet_model$betas)

# see the results
enet_model$model_summary %>% t()
                                      [,1]
alpha                            0.0000000
lambda                          51.8414603
auc                              0.7602734
auc_czech                        0.7929631
auc_no                           0.7184415
auc_optimism_corrected           0.4945404
auc_optimism_corrected_CIL       0.4553473
auc_optimism_corrected_CIU       0.5558614
accuracy                         0.6843137
accuracy_czech                         NaN
accuracy_no                      0.6666667
accuracy_optimism_corrected      0.6705656
accuracy_optimism_corrected_CIL  0.6245660
accuracy_optimism_corrected_CIU  0.7041182
enet_model$conf_matrices
$original
    1  
0 161 0
1 349 0

$czech
    1  
0  92 0
1 211 0

$no
    1  
0  69 0
1 138 0
enet_model$plot


roc_c

Genus level

level="genus"

Aggregate taxa

genus_data <- aggregate_taxa(colon_asv_tab,
                             colon_taxa_tab,
                             taxonomic_level = level)

colon_genus_tab <- genus_data[[1]]
colon_genus_taxa_tab <- genus_data[[2]]
pre_ltx vs healthy
group <- c("pre_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_colon_uni_data <- binomial_prep(colon_genus_tab,
                                     colon_genus_taxa_tab,
                                     colon_metadata,
                                     group, 
                                     usage="ml_clr",
                                     patient = TRUE)
Removing 135 ASV(s)
Removing 10 ASV(s)
# fit the model
enet_model <- glmnet_binomial(filt_colon_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                              clust_var="Patient",
                              reuse=FALSE,
                              file=model_name,
                              Q="Q1",overfitting_check = TRUE)


# ROC
roc_c <- roc_curve(enet_model, group)
Warning: Ignoring unknown aesthetics: by
# save the results
models_summ[[model_name]] <- enet_model$model_summary
models_cm[[model_name]] <- enet_model$conf_matrices$original
roc_cs[[model_name]] <- enet_model$kfold_rocobjs
betas[[model_name]] <- as.matrix(enet_model$betas)

# see the results
enet_model$model_summary %>% t()
                                      [,1]
alpha                           0.80000000
lambda                          0.07056436
auc                             0.50000000
auc_czech                       0.50000000
auc_no                          0.50000000
auc_optimism_corrected          0.45284404
auc_optimism_corrected_CIL      0.39551409
auc_optimism_corrected_CIU      0.49948653
accuracy                        0.60827251
accuracy_czech                         NaN
accuracy_no                     0.60218978
accuracy_optimism_corrected     0.57337784
accuracy_optimism_corrected_CIL 0.52482349
accuracy_optimism_corrected_CIU 0.62227176
enet_model$conf_matrices
$original
    1  
0 161 0
1 250 0

$czech
   1  
0 52 0
1 85 0

$no
    1  
0 109 0
1 165 0
enet_model$plot


roc_c

pre_ltx vs post_ltx
group <- c("pre_ltx","post_ltx")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_colon_uni_data <- binomial_prep(colon_genus_tab,
                                     colon_genus_taxa_tab,
                                     colon_metadata,
                                     group, 
                                     usage="ml_clr",
                                     patient = TRUE)
Removing 17 ASV(s)
Removing 10 ASV(s)
# fit the model
enet_model <- glmnet_binomial(filt_colon_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                              clust_var="Patient",
                              reuse=FALSE,
                              file=model_name,
                              Q="Q1",
                              overfitting_check = TRUE)

# ROC
roc_c <- roc_curve(enet_model, group)
Warning: Ignoring unknown aesthetics: by
# save the results
models_summ[[model_name]] <- enet_model$model_summary
models_cm[[model_name]] <- enet_model$conf_matrices$original
roc_cs[[model_name]] <- enet_model$kfold_rocobjs
betas[[model_name]] <- as.matrix(enet_model$betas)

# see the results
enet_model$model_summary %>% t()
                                      [,1]
alpha                           0.80000000
lambda                          0.07324714
auc                             0.50000000
auc_czech                       0.50000000
auc_no                          0.50000000
auc_optimism_corrected          0.49333832
auc_optimism_corrected_CIL      0.47331491
auc_optimism_corrected_CIU      0.52415390
accuracy                        0.58333333
accuracy_czech                         NaN
accuracy_no                     0.56446991
accuracy_optimism_corrected     0.56083992
accuracy_optimism_corrected_CIL 0.49608392
accuracy_optimism_corrected_CIU 0.60465613
enet_model$conf_matrices
$original
    0  
0 350 0
1 250 0

$czech
    0  
0 153 0
1  98 0

$no
    0  
0 197 0
1 152 0
enet_model$plot


roc_c

post_ltx vs healthy
group <- c("post_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_colon_uni_data <- binomial_prep(colon_genus_tab,
                                     colon_genus_taxa_tab,
                                     colon_metadata,
                                     group, 
                                     usage="ml_clr",
                                     patient = TRUE)
Removing 71 ASV(s)
Removing 5 ASV(s)
# fit the model
enet_model <- glmnet_binomial(filt_colon_uni_data,
                              sample_method = "atypboot",
                              outcome="Group",
                              N=10,
                              clust_var="Patient",
                              reuse=FALSE,
                              file=model_name,
                              Q="Q1",
                              overfitting_check = TRUE)
Warning: from glmnet C++ code (error code -100); Convergence for 100th lambda value not reached after maxit=100000 iterations; solutions for larger lambdas returned
# ROC
roc_c <- roc_curve(enet_model, group)
Warning: Ignoring unknown aesthetics: by
# save the results
models_summ[[model_name]] <- enet_model$model_summary
models_cm[[model_name]] <- enet_model$conf_matrices$original
roc_cs[[model_name]] <- enet_model$kfold_rocobjs
betas[[model_name]] <- as.matrix(enet_model$betas)

# see the results
enet_model$model_summary %>% t()
                                      [,1]
alpha                            0.0000000
lambda                          62.4494689
auc                              0.7251109
auc_czech                        0.7409982
auc_no                           0.7004115
auc_optimism_corrected           0.4714759
auc_optimism_corrected_CIL       0.4071017
auc_optimism_corrected_CIU       0.5395775
accuracy                         0.6849315
accuracy_czech                         NaN
accuracy_no                      0.6521739
accuracy_optimism_corrected      0.6708665
accuracy_optimism_corrected_CIL  0.6086170
accuracy_optimism_corrected_CIU  0.7311268
enet_model$conf_matrices
$original
    1  
0 161 0
1 350 0

$czech
    1  
0  89 0
1 215 0

$no
    1  
0  72 0
1 135 0
enet_model$plot

roc_c

Saving results

models_summ_df_colon <- do.call(rbind, 
  models_summ[grep(segment,names(models_summ),value = TRUE)])

write.csv(models_summ_df_colon,file.path(path,paste0("elastic_net_",segment,".csv")))

Supplementary models

CLR-transformed data

kNN
model="knn"
ASV level
level="ASV"

pre_ltx vs healthy

group <- c("pre_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])

pre_ltx vs post_ltx

group <- c("pre_ltx","post_ltx")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)


# prepare the data
filt_colon_uni_data <- binomial_prep(colon_asv_tab,
                                     colon_taxa_tab,
                                     colon_metadata,
                                     group, usage="ml_clr",
                                     patient = TRUE)
Removing 1157 ASV(s)
Removing 52 ASV(s)
# fit the model
knn_model <- knn_binomial(filt_colon_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                          clust_var="Patient",
                              reuse=FALSE,
                              file=model_name,
                              Q="Q1",
                          overfitting_check = TRUE)

# ROC curve
roc_c <- roc_curve(knn_model, group)
Warning: Ignoring unknown aesthetics: by
# save the results
supplements_models[["models_summ"]][["knn_model"]][[model_name]] <- knn_model$model_summary
supplements_models[["roc_cs"]][["knn_model"]][[model_name]] <- knn_model$kfold_rocobjs


# see the results
knn_model$model_summary %>% t()
                                      [,1]
k                               24.0000000
auc                              0.6464814
auc_optimism_corrected           0.4869012
auc_optimism_corrected_CIL       0.4479849
auc_optimism_corrected_CIU       0.5201409
accuracy                         0.5993322
accuracy_optimism_corrected      0.5568528
accuracy_optimism_corrected_CIL  0.5195593
accuracy_optimism_corrected_CIU  0.5805193
roc_c

post_ltx vs healthy

group <- c("post_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_colon_uni_data <- binomial_prep(colon_asv_tab,
                                     colon_taxa_tab,
                                     colon_metadata,
                                     group, usage="ml_clr",
                                     patient = TRUE)
Removing 1096 ASV(s)
Removing 50 ASV(s)
# fit the model
knn_model <- knn_binomial(filt_colon_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                          clust_var="Patient",
                              reuse=FALSE,
                              file=model_name,
                              Q="Q1",
                          overfitting_check = TRUE)

# ROC curve
roc_c <- roc_curve(knn_model, group)
Warning: Ignoring unknown aesthetics: by
# save the results
# save the results
supplements_models[["models_summ"]][["knn_model"]][[model_name]] <- knn_model$model_summary
supplements_models[["roc_cs"]][["knn_model"]][[model_name]] <- knn_model$kfold_rocobjs

# see the results
knn_model$model_summary %>% t()
                                      [,1]
k                               27.0000000
auc                              0.5670594
auc_optimism_corrected           0.4409962
auc_optimism_corrected_CIL       0.3668215
auc_optimism_corrected_CIU       0.5009250
accuracy                         0.6882353
accuracy_optimism_corrected      0.6590241
accuracy_optimism_corrected_CIL  0.6378461
accuracy_optimism_corrected_CIU  0.6852319
roc_c

Genus level
level="genus"

Aggregate taxa

genus_data <- aggregate_taxa(colon_asv_tab,
                             colon_taxa_tab,
                             taxonomic_level = level)

colon_genus_tab <- genus_data[[1]]
colon_genus_taxa_tab <- genus_data[[2]]

pre_ltx vs healthy

group <- c("pre_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_colon_uni_data <- binomial_prep(colon_genus_tab,
                                     colon_genus_taxa_tab,
                                     colon_metadata,
                                     group, 
                                     usage="ml_clr",
                                     patient = TRUE)
Removing 135 ASV(s)
Removing 10 ASV(s)
# fit the model
knn_model <- knn_binomial(filt_colon_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                          clust_var="Patient",
                              reuse=FALSE,
                              file=model_name,
                              Q="Q1",
                          overfitting_check = TRUE)

# ROC curve
roc_c <- roc_curve(knn_model, group)
Warning: Ignoring unknown aesthetics: by
# save the results
supplements_models[["models_summ"]][["knn_model"]][[model_name]] <- knn_model$model_summary
supplements_models[["roc_cs"]][["knn_model"]][[model_name]] <- knn_model$kfold_rocobjs


# see the results
knn_model$model_summary %>% t()
                                      [,1]
k                               29.0000000
auc                              0.6010435
auc_optimism_corrected           0.4871584
auc_optimism_corrected_CIL       0.4329184
auc_optimism_corrected_CIU       0.5517731
accuracy                         0.6107056
accuracy_optimism_corrected      0.5527559
accuracy_optimism_corrected_CIL  0.5087752
accuracy_optimism_corrected_CIU  0.6054608
roc_c

pre_ltx vs post_ltx

group <- c("pre_ltx","post_ltx")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_colon_uni_data <- binomial_prep(colon_genus_tab,
                                     colon_genus_taxa_tab,
                                     colon_metadata,
                                     group, 
                                     usage="ml_clr",
                                     patient = TRUE)
Removing 17 ASV(s)
Removing 10 ASV(s)
# fit the model
knn_model <- knn_binomial(filt_colon_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                          clust_var="Patient",
                              reuse=FALSE,
                              file=model_name,
                              Q="Q1",
                          overfitting_check = TRUE)

# ROC curve
roc_c <- roc_curve(knn_model, group)
Warning: Ignoring unknown aesthetics: by
# save the results
supplements_models[["models_summ"]][["knn_model"]][[model_name]] <- knn_model$model_summary
supplements_models[["roc_cs"]][["knn_model"]][[model_name]] <- knn_model$kfold_rocobjs


# see the results
knn_model$model_summary %>% t()
                                      [,1]
k                               30.0000000
auc                              0.6043829
auc_optimism_corrected           0.5211546
auc_optimism_corrected_CIL       0.4650463
auc_optimism_corrected_CIU       0.5706488
accuracy                         0.5883333
accuracy_optimism_corrected      0.5710544
accuracy_optimism_corrected_CIL  0.5116544
accuracy_optimism_corrected_CIU  0.6208351
roc_c

post_ltx vs healthy

group <- c("post_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_colon_uni_data <- binomial_prep(colon_genus_tab,
                                     colon_genus_taxa_tab,
                                     colon_metadata,
                                     group, 
                                     usage="ml_clr",
                                     patient = TRUE)
Removing 71 ASV(s)
Removing 5 ASV(s)
# fit the model
knn_model <- knn_binomial(filt_colon_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                          clust_var="Patient",
                              reuse=FALSE,
                              file=model_name,
                              Q="Q1",
                          overfitting_check = TRUE)

# ROC curve
roc_c <- roc_curve(knn_model, group)
Warning: Ignoring unknown aesthetics: by
# save the results
supplements_models[["models_summ"]][["knn_model"]][[model_name]] <- knn_model$model_summary
supplements_models[["roc_cs"]][["knn_model"]][[model_name]] <- knn_model$kfold_rocobjs


# see the results
knn_model$model_summary %>% t()
                                      [,1]
k                               30.0000000
auc                              0.6109583
auc_optimism_corrected           0.4960179
auc_optimism_corrected_CIL       0.4322515
auc_optimism_corrected_CIU       0.5666104
accuracy                         0.6868885
accuracy_optimism_corrected      0.6426930
accuracy_optimism_corrected_CIL  0.5991299
accuracy_optimism_corrected_CIU  0.6791946
roc_c

Random Forest
model="rf"
ASV level
level="ASV"

pre_ltx vs healthy

group <- c("pre_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_colon_uni_data <- binomial_prep(colon_asv_tab,
                                     colon_taxa_tab,
                                     colon_metadata,
                                     group, usage="ml_clr",
                                     patient = TRUE)
Removing 2138 ASV(s)
Removing 70 ASV(s)
# fit the model
rf_model <- rf_binomial(filt_colon_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                        clust_var="Patient",
                              reuse=FALSE,
                              file=model_name,
                              Q="Q1",
                        overfitting_check = TRUE)

pre_ltx vs post_ltx

group <- c("pre_ltx","post_ltx")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_colon_uni_data <- binomial_prep(colon_asv_tab,
                                     colon_taxa_tab,
                                     colon_metadata,
                                     group, usage="ml_clr",
                                     patient = TRUE)
Removing 1157 ASV(s)
Removing 52 ASV(s)
# fit the model
rf_model <- rf_binomial(filt_colon_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                        clust_var="Patient",
                              reuse=FALSE,
                              file=model_name,
                              Q="Q1")

# ROC curve
roc_c <- roc_curve(rf_model, group)
Warning in geom_line(aes(x = `1-specificity`, y = sensitivity, by = name,  :
  Ignoring unknown aesthetics: by
# save the results
supplements_models[["models_summ"]][["rf_model"]][[model_name]] <- rf_model$model_summary
supplements_models[["roc_cs"]][["rf_model"]][[model_name]] <- rf_model$kfold_rocobjs

# see the results
rf_model$model_summary %>% t()
                                [,1]       
mtry                            "143"      
splitrule                       "gini"     
min.node.size                   "2"        
auc                             "1"        
auc_optimism_corrected          "0.9599069"
auc_optimism_corrected_CIL      "0.9178946"
auc_optimism_corrected_CIU      "0.9905348"
accuracy                        "1"        
accuracy_optimism_corrected     "0.8984082"
accuracy_optimism_corrected_CIL "0.8416505"
accuracy_optimism_corrected_CIU "0.9411982"
roc_c

post_ltx vs healthy

group <- c("post_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_colon_uni_data <- binomial_prep(colon_asv_tab,
                                     colon_taxa_tab,
                                     colon_metadata,
                                     group, usage="ml_clr",
                                     patient = TRUE)
Removing 1096 ASV(s)
Removing 50 ASV(s)
# fit the model
rf_model <- rf_binomial(filt_colon_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                        clust_var="Patient",
                              reuse=FALSE,
                              file=model_name,
                              Q="Q1")
Genus level

Aggregate taxa

pre_ltx vs healthy

pre_ltx vs post_ltx

post_ltx vs healthy

Gradient boosting
ASV level

pre_ltx vs healthy

pre_ltx vs post_ltx

post_ltx vs healthy

Genus level

Aggregate taxa

pre_ltx vs healthy

pre_ltx vs post_ltx

post_ltx vs healthy

Relative abundances

Elastic net
ASV level

pre_ltx vs healthy

pre_ltx vs post_ltx

post_ltx vs healthy

Genus level

Aggregate taxa

pre_ltx vs healthy

pre_ltx vs post_ltx

post_ltx vs healthy

kNN
ASV level

pre_ltx vs healthy

pre_ltx vs post_ltx

post_ltx vs healthy

Genus level

Aggregate taxa

pre_ltx vs healthy

pre_ltx vs post_ltx

post_ltx vs healthy

Random Forest
ASV level

pre_ltx vs healthy

pre_ltx vs post_ltx

post_ltx vs healthy

Genus level

Aggregate taxa

pre_ltx vs healthy

pre_ltx vs post_ltx

post_ltx vs healthy

Gradient boosting
ASV level

pre_ltx vs healthy

pre_ltx vs post_ltx

post_ltx vs healthy

Genus level

Aggregate taxa

pre_ltx vs healthy

pre_ltx vs post_ltx

post_ltx vs healthy

Saving results

---
title: "ML models check"
output: html_notebook
---

```{r}
source("custom_functions.R")
```

# Data Import

Importing ASV, taxa and metadata tables for both Czech and Norway
samples.

**Czech**

```{r}
path = "../../data/analysis_ready_data/ikem/"
asv_tab_ikem <- as.data.frame(fread(file.path(path,"asv_table_ikem.csv"),
                                    check.names = FALSE))
taxa_tab_ikem <- as.data.frame(fread(file.path(path,"taxa_table_ikem.csv"),
                                     check.names = FALSE))
metadata_ikem <- as.data.frame(fread(file.path(path,"metadata_ikem.csv"),
                                     check.names = FALSE))
```

**Norway**

```{r}
path = "../../data/analysis_ready_data/norway/"
asv_tab_norway <- as.data.frame(fread(file.path(path,"asv_table_norway.csv"),
                                    check.names = FALSE))
taxa_tab_norway <- as.data.frame(fread(file.path(path,"taxa_table_norway.csv"),
                                    check.names = FALSE))
metadata_norway <- as.data.frame(fread(file.path(path,"metadata_norway.csv"),
                                    check.names = FALSE))
```

# Merging

**TO DO: STATISTICS OF READS**

Merging two countries based on the different matrices - Ileum, Colon.

**Terminal ileum**

```{r}
ileum_data <- merging_data(asv_tab_1=asv_tab_ikem,
                           asv_tab_2=asv_tab_norway,
                           taxa_tab_1=taxa_tab_ikem,
                           taxa_tab_2=taxa_tab_norway,
                           metadata_1=metadata_ikem,
                           metadata_2=metadata_norway,
                           segment="TI",Q="Q1")

ileum_asv_tab <- ileum_data[[1]]
ileum_taxa_tab <- ileum_data[[2]]
ileum_metadata <- ileum_data[[3]]
```

**Colon**

```{r}
colon_data <- merging_data(asv_tab_1=asv_tab_ikem,
                           asv_tab_2=asv_tab_norway,
                           taxa_tab_1=taxa_tab_ikem,
                           taxa_tab_2=taxa_tab_norway,
                           metadata_1=metadata_ikem,
                           metadata_2=metadata_norway,
                           segment="colon",Q="Q1")

colon_asv_tab <- colon_data[[1]]
colon_taxa_tab <- colon_data[[2]]
colon_metadata <- colon_data[[3]]
```

# Data Analysis - Terminal ileum

```{r}
segment="terminal_ileum"
```

## Machine learning

```{r}
path = "../results/Q1/models_overfitting_check"
```

### ElasticNet

```{r}
model="enet"
```

#### ASV level

```{r}
level="ASV"
```

##### pre_ltx vs healthy

```{r}
group <- c("pre_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
```

```{r}
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_ileum_uni_data <- binomial_prep(ileum_asv_tab,
                                     ileum_taxa_tab,
                                     ileum_metadata,
                                     group, usage="ml_clr")

# fit the model
enet_model <- glmnet_binomial(filt_ileum_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                              reuse=FALSE,
                              file=model_name,
                              Q="Q1",
                              overfitting_check = TRUE)

# ROC curve
roc_c <- roc_curve(enet_model, group)

# save the results
models_summ <- list()
models_cm <- list()
betas <- list()
roc_cs <- list()

models_summ[[model_name]] <- enet_model$model_summary
models_cm[[model_name]] <- enet_model$conf_matrices$original
roc_cs[[model_name]] <- enet_model$kfold_rocobjs
betas[[model_name]] <- as.matrix(enet_model$betas)

# see the results
enet_model$model_summary %>% t()
enet_model$conf_matrices
enet_model$plot
roc_c
```

##### pre_ltx vs post_ltx

```{r}
group <- c("pre_ltx","post_ltx")
comparison_name <- paste0(group[1], " vs ",group[2])
```

```{r}
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_ileum_uni_data <- binomial_prep(ileum_asv_tab,
                                     ileum_taxa_tab,
                                     ileum_metadata,
                                     group, 
                                     usage="ml_clr")

# fit the model
enet_model <- glmnet_binomial(filt_ileum_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                              reuse=FALSE,
                              file=model_name,
                              Q="Q1",
                              overfitting_check = TRUE)

# ROC
roc_c <- roc_curve(enet_model, group)

# save the results
models_summ[[model_name]] <- enet_model$model_summary
models_cm[[model_name]] <- enet_model$conf_matrices$original
roc_cs[[model_name]] <- enet_model$kfold_rocobjs
betas[[model_name]] <- as.matrix(enet_model$betas)

# see the results
enet_model$model_summary %>% t()
enet_model$conf_matrices
enet_model$plot

roc_c
```

##### post_ltx vs healthy

```{r}
group <- c("post_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
```

```{r}
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_ileum_uni_data <- binomial_prep(ileum_asv_tab,
                                     ileum_taxa_tab,
                                     ileum_metadata,
                                     group, 
                                     usage="ml_clr")

# fit the model
enet_model <- glmnet_binomial(filt_ileum_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                              reuse=FALSE,
                              file=model_name,
                              Q="Q1",
                              overfitting_check = TRUE)

# ROC
roc_c <- roc_curve(enet_model, group)

# save the results
models_summ[[model_name]] <- enet_model$model_summary
models_cm[[model_name]] <- enet_model$conf_matrices$original
roc_cs[[model_name]] <- enet_model$kfold_rocobjs
betas[[model_name]] <- as.matrix(enet_model$betas)

# see the results
enet_model$model_summary %>% t()
enet_model$conf_matrices
enet_model$plot

roc_c
```

#### Genus level

```{r}
level="genus"
```

Aggregate taxa

```{r}
genus_data <- aggregate_taxa(ileum_asv_tab,
                             ileum_taxa_tab,
                             taxonomic_level = level)

ileum_genus_tab <- genus_data[[1]]
ileum_genus_taxa_tab <- genus_data[[2]]
```

##### pre_ltx vs healthy

```{r}
group <- c("pre_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
```

```{r}
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_ileum_uni_data <- binomial_prep(ileum_genus_tab,
                                     ileum_genus_taxa_tab,
                                     ileum_metadata,
                                     group, 
                                     usage="ml_clr")

# fit the model
enet_model <- glmnet_binomial(filt_ileum_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                              reuse=FALSE,
                              file=model_name,
                              Q="Q1",
                              overfitting_check = TRUE)

# ROC
roc_c <- roc_curve(enet_model, group)

# save the results
models_summ[[model_name]] <- enet_model$model_summary
models_cm[[model_name]] <- enet_model$conf_matrices$original
roc_cs[[model_name]] <- enet_model$kfold_rocobjs
betas[[model_name]] <- as.matrix(enet_model$betas)

# see the results
enet_model$model_summary %>% t()
enet_model$conf_matrices
enet_model$plot

roc_c
```

##### pre_ltx vs post_ltx

```{r}
group <- c("pre_ltx","post_ltx")
comparison_name <- paste0(group[1], " vs ",group[2])
```

```{r}
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_ileum_uni_data <- binomial_prep(ileum_genus_tab,
                                     ileum_genus_taxa_tab,
                                     ileum_metadata,
                                     group, 
                                     usage="ml_clr")

# fit the model
enet_model <- glmnet_binomial(filt_ileum_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", N=10,
                              reuse=FALSE,
                              file=model_name,
                              Q="Q1",
                              overfitting_check = TRUE)

# ROC
roc_c <- roc_curve(enet_model, group)

# save the results
models_summ[[model_name]] <- enet_model$model_summary
models_cm[[model_name]] <- enet_model$conf_matrices$original
roc_cs[[model_name]] <- enet_model$kfold_rocobjs
betas[[model_name]] <- as.matrix(enet_model$betas)

# see the results
enet_model$model_summary %>% t()
enet_model$conf_matrices
enet_model$plot

roc_c
```

##### post_ltx vs healthy

```{r}
group <- c("post_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
```

```{r}
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_ileum_uni_data <- binomial_prep(ileum_genus_tab,
                                     ileum_genus_taxa_tab,
                                     ileum_metadata,group, 
                                     usage="ml_clr")

# fit the model
enet_model <- glmnet_binomial(filt_ileum_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                              reuse=FALSE,
                              file=model_name,
                              Q="Q1",
                              overfitting_check = TRUE)

# ROC
roc_c <- roc_curve(enet_model, group)

# save the results
models_summ[[model_name]] <- enet_model$model_summary
models_cm[[model_name]] <- enet_model$conf_matrices$original
roc_cs[[model_name]] <- enet_model$kfold_rocobjs
betas[[model_name]] <- as.matrix(enet_model$betas)

# see the results
enet_model$model_summary %>% t()
enet_model$conf_matrices
enet_model$plot
roc_c
```

#### Saving results

```{r}
models_summ_df_ileum <- do.call(rbind, 
  models_summ[grep(segment,names(models_summ),value = TRUE)])

write.csv(models_summ_df_ileum,file.path(path,paste0("elastic_net_",segment,".csv")))
```
### Supplementary models

```{r}
supplements_models <- list()
```

#### CLR-transformed data

##### kNN

```{r}
model="knn"
```

###### ASV level

```{r}
level="ASV"
```

***pre_ltx vs healthy***

```{r}
group <- c("pre_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
```

```{r}
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_ileum_uni_data <- binomial_prep(ileum_asv_tab,
                                     ileum_taxa_tab,
                                     ileum_metadata,
                                     group, usage="ml_clr")

# fit the model
knn_model <- knn_binomial(filt_ileum_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                              reuse=FALSE,
                              file=model_name,
                              Q="Q1",
                              overfitting_check = TRUE)

# ROC curve
roc_c <- roc_curve(knn_model, group)

# save the results
supplements_models[["models_summ"]][["knn_model"]][[model_name]] <- knn_model$model_summary
supplements_models[["roc_cs"]][["knn_model"]][[model_name]] <- knn_model$kfold_rocobjs

# see the results
knn_model$model_summary %>% t()
roc_c
```

**pre_ltx vs post_ltx**

```{r}
group <- c("pre_ltx","post_ltx")
comparison_name <- paste0(group[1], " vs ",group[2])
```

```{r}
model_name <- paste(comparison_name,level,segment)


# prepare the data
filt_ileum_uni_data <- binomial_prep(ileum_asv_tab,
                                     ileum_taxa_tab,
                                     ileum_metadata,
                                     group, usage="ml_clr")

# fit the model
knn_model <- knn_binomial(filt_ileum_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                              reuse=FALSE,
                              file=model_name,
                              Q="Q1",
                              overfitting_check = TRUE)

# ROC curve
roc_c <- roc_curve(knn_model, group)

# save the results
supplements_models[["models_summ"]][["knn_model"]][[model_name]] <- knn_model$model_summary
supplements_models[["roc_cs"]][["knn_model"]][[model_name]] <- knn_model$kfold_rocobjs


# see the results
knn_model$model_summary %>% t()
roc_c
```

**post_ltx vs healthy**

```{r}
group <- c("post_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
```

```{r}
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_ileum_uni_data <- binomial_prep(ileum_asv_tab,
                                     ileum_taxa_tab,
                                     ileum_metadata,
                                     group, usage="ml_clr")

# fit the model
knn_model <- knn_binomial(filt_ileum_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                              reuse=FALSE,
                              file=model_name,
                              Q="Q1",
                              overfitting_check = TRUE)

# ROC curve
roc_c <- roc_curve(knn_model, group)

# save the results
# save the results
supplements_models[["models_summ"]][["knn_model"]][[model_name]] <- knn_model$model_summary
supplements_models[["roc_cs"]][["knn_model"]][[model_name]] <- knn_model$kfold_rocobjs

# see the results
knn_model$model_summary %>% t()
roc_c
```

###### Genus level

```{r}
level="genus"
```

Aggregate taxa

```{r}
genus_data <- aggregate_taxa(ileum_asv_tab,
                             ileum_taxa_tab,
                             taxonomic_level = level)

ileum_genus_tab <- genus_data[[1]]
ileum_genus_taxa_tab <- genus_data[[2]]
```

**pre_ltx vs healthy**

```{r}
group <- c("pre_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
```

```{r}
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_ileum_uni_data <- binomial_prep(ileum_genus_tab,
                                     ileum_genus_taxa_tab,
                                     ileum_metadata,
                                     group, 
                                     usage="ml_clr")

# fit the model
knn_model <- knn_binomial(filt_ileum_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                              reuse=FALSE,
                              file=model_name,
                              Q="Q1",
                              overfitting_check = TRUE)

# ROC curve
roc_c <- roc_curve(knn_model, group)

# save the results
supplements_models[["models_summ"]][["knn_model"]][[model_name]] <- knn_model$model_summary
supplements_models[["roc_cs"]][["knn_model"]][[model_name]] <- knn_model$kfold_rocobjs


# see the results
knn_model$model_summary %>% t()
roc_c
```

**pre_ltx vs post_ltx**

```{r}
group <- c("pre_ltx","post_ltx")
comparison_name <- paste0(group[1], " vs ",group[2])
```

```{r}
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_ileum_uni_data <- binomial_prep(ileum_genus_tab,
                                     ileum_genus_taxa_tab,
                                     ileum_metadata,
                                     group, 
                                     usage="ml_clr")

# fit the model
knn_model <- knn_binomial(filt_ileum_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                              reuse=FALSE,
                              file=model_name,
                              Q="Q1",
                              overfitting_check = TRUE)

# ROC curve
roc_c <- roc_curve(knn_model, group)

# save the results
supplements_models[["models_summ"]][["knn_model"]][[model_name]] <- knn_model$model_summary
supplements_models[["roc_cs"]][["knn_model"]][[model_name]] <- knn_model$kfold_rocobjs


# see the results
knn_model$model_summary %>% t()
roc_c
```

**post_ltx vs healthy**

```{r}
group <- c("post_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
```

```{r}
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_ileum_uni_data <- binomial_prep(ileum_genus_tab,
                                     ileum_genus_taxa_tab,
                                     ileum_metadata,
                                     group, 
                                     usage="ml_clr")

# fit the model
knn_model <- knn_binomial(filt_ileum_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                              reuse=FALSE,
                              file=model_name,
                              Q="Q1",
                              overfitting_check = TRUE)

# ROC curve
roc_c <- roc_curve(knn_model, group)

# save the results
supplements_models[["models_summ"]][["knn_model"]][[model_name]] <- knn_model$model_summary
supplements_models[["roc_cs"]][["knn_model"]][[model_name]] <- knn_model$kfold_rocobjs


# see the results
knn_model$model_summary %>% t()
roc_c
```

##### Random Forest

```{r}
model="rf"
```

###### ASV level

```{r}
level="ASV"
```

***pre_ltx vs healthy***

```{r}
group <- c("pre_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
```

```{r}
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_ileum_uni_data <- binomial_prep(ileum_asv_tab,
                                     ileum_taxa_tab,
                                     ileum_metadata,
                                     group, usage="ml_clr")

# fit the model
rf_model <- rf_binomial(filt_ileum_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                              reuse=FALSE,
                              file=model_name,
                              Q="Q1",
                              overfitting_check = TRUE)

# ROC curve
roc_c <- roc_curve(rf_model, group)

# save the results
supplements_models[["models_summ"]][["rf_model"]][[model_name]] <- rf_model$model_summary
supplements_models[["roc_cs"]][["rf_model"]][[model_name]] <- rf_model$kfold_rocobjs

# see the results
rf_model$model_summary %>% t()
roc_c
```

**pre_ltx vs post_ltx**

```{r}
group <- c("pre_ltx","post_ltx")
comparison_name <- paste0(group[1], " vs ",group[2])
```

```{r}
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_ileum_uni_data <- binomial_prep(ileum_asv_tab,
                                     ileum_taxa_tab,
                                     ileum_metadata,
                                     group, usage="ml_clr")

# fit the model
rf_model <- rf_binomial(filt_ileum_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                              reuse=FALSE,
                              file=model_name,
                              Q="Q1",
                              overfitting_check = TRUE)

# ROC curve
roc_c <- roc_curve(rf_model, group)

# save the results
supplements_models[["models_summ"]][["rf_model"]][[model_name]] <- rf_model$model_summary
supplements_models[["roc_cs"]][["rf_model"]][[model_name]] <- rf_model$kfold_rocobjs

# see the results
rf_model$model_summary %>% t()
roc_c
```

**post_ltx vs healthy**

```{r}
group <- c("post_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
```

```{r}
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_ileum_uni_data <- binomial_prep(ileum_asv_tab,
                                     ileum_taxa_tab,
                                     ileum_metadata,
                                     group, usage="ml_clr")

# fit the model
rf_model <- rf_binomial(filt_ileum_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                              reuse=FALSE,
                              file=model_name,
                              Q="Q1",
                              overfitting_check = TRUE)

# ROC curve
roc_c <- roc_curve(rf_model, group)

# save the results
supplements_models[["models_summ"]][["rf_model"]][[model_name]] <- rf_model$model_summary
supplements_models[["roc_cs"]][["rf_model"]][[model_name]] <- rf_model$kfold_rocobjs

# see the results
rf_model$model_summary %>% t()
roc_c
```

###### Genus level

```{r}
level="genus"
```

Aggregate taxa

```{r}
genus_data <- aggregate_taxa(ileum_asv_tab,
                             ileum_taxa_tab,
                             taxonomic_level = level)

ileum_genus_tab <- genus_data[[1]]
ileum_genus_taxa_tab <- genus_data[[2]]
```

**pre_ltx vs healthy**

```{r}
group <- c("pre_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
```

```{r}
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_ileum_uni_data <- binomial_prep(ileum_genus_tab,
                                     ileum_genus_taxa_tab,
                                     ileum_metadata,
                                     group, 
                                     usage="ml_clr")

# fit the model
rf_model <- rf_binomial(filt_ileum_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                              reuse=FALSE,
                              file=model_name,
                              Q="Q1",
                              overfitting_check = TRUE)

# ROC curve
roc_c <- roc_curve(rf_model, group)

# save the results
supplements_models[["models_summ"]][["rf_model"]][[model_name]] <- rf_model$model_summary
supplements_models[["roc_cs"]][["rf_model"]][[model_name]] <- rf_model$kfold_rocobjs

# see the results
rf_model$model_summary %>% t()
roc_c
```

**pre_ltx vs post_ltx**

```{r}
group <- c("pre_ltx","post_ltx")
comparison_name <- paste0(group[1], " vs ",group[2])
```

```{r}
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_ileum_uni_data <- binomial_prep(ileum_genus_tab,
                                     ileum_genus_taxa_tab,
                                     ileum_metadata,
                                     group, 
                                     usage="ml_clr")

# fit the model
rf_model <- rf_binomial(filt_ileum_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                              reuse=FALSE,
                              file=model_name,
                              Q="Q1",
                              overfitting_check = TRUE)

# ROC curve
roc_c <- roc_curve(rf_model, group)

# save the results
supplements_models[["models_summ"]][["rf_model"]][[model_name]] <- rf_model$model_summary
supplements_models[["roc_cs"]][["rf_model"]][[model_name]] <- rf_model$kfold_rocobjs

# see the results
rf_model$model_summary %>% t()
roc_c
```

**post_ltx vs healthy**

```{r}
group <- c("post_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
```

```{r}
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_ileum_uni_data <- binomial_prep(ileum_genus_tab,
                                     ileum_genus_taxa_tab,
                                     ileum_metadata,
                                     group, 
                                     usage="ml_clr")

# fit the model
rf_model <- rf_binomial(filt_ileum_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                              reuse=FALSE,
                              file=model_name,
                              Q="Q1",
                              overfitting_check = TRUE)

# ROC curve
roc_c <- roc_curve(rf_model, group)

# save the results
supplements_models[["models_summ"]][["rf_model"]][[model_name]] <- rf_model$model_summary
supplements_models[["roc_cs"]][["rf_model"]][[model_name]] <- rf_model$kfold_rocobjs

# see the results
rf_model$model_summary %>% t()
roc_c
```

##### Gradient boosting

```{r}
model="gb"
```

###### ASV level

```{r}
level="ASV"
```

***pre_ltx vs healthy***

```{r}
group <- c("pre_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
```

```{r}
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_ileum_uni_data <- binomial_prep(ileum_asv_tab,
                                     ileum_taxa_tab,
                                     ileum_metadata,
                                     group, usage="ml_clr")

# fit the model
gbm_model <- gbm_binomial(filt_ileum_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                              reuse=FALSE,
                              file=model_name,
                              Q="Q1",
                              overfitting_check = TRUE)

# ROC curve
roc_c <- roc_curve(gbm_model, group)

# save the results
supplements_models[["models_summ"]][["gbm_model"]][[model_name]] <- gbm_model$model_summary
supplements_models[["roc_cs"]][["gbm_model"]][[model_name]] <- gbm_model$kfold_rocobjs

# see the results
gbm_model$model_summary %>% t()
roc_c
```

**pre_ltx vs post_ltx**

```{r}
group <- c("pre_ltx","post_ltx")
comparison_name <- paste0(group[1], " vs ",group[2])
```

```{r}
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_ileum_uni_data <- binomial_prep(ileum_asv_tab,
                                     ileum_taxa_tab,
                                     ileum_metadata,
                                     group, usage="ml_clr")

# fit the model
gbm_model <- gbm_binomial(filt_ileum_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                              reuse=FALSE,
                              file=model_name,
                              Q="Q1",
                              overfitting_check = TRUE)

# ROC curve
roc_c <- roc_curve(gbm_model, group)

# save the results
supplements_models[["models_summ"]][["gbm_model"]][[model_name]] <- gbm_model$model_summary
supplements_models[["roc_cs"]][["gbm_model"]][[model_name]] <- gbm_model$kfold_rocobjs


# see the results
gbm_model$model_summary %>% t()
roc_c
```

**post_ltx vs healthy**

```{r}
group <- c("post_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
```

```{r}
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_ileum_uni_data <- binomial_prep(ileum_asv_tab,
                                     ileum_taxa_tab,
                                     ileum_metadata,
                                     group, usage="ml_clr")

# fit the model
gbm_model <- gbm_binomial(filt_ileum_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                              reuse=FALSE,
                              file=model_name,
                              Q="Q1",
                              overfitting_check = TRUE)

# ROC curve
roc_c <- roc_curve(gbm_model, group)

# save the results
supplements_models[["models_summ"]][["gbm_model"]][[model_name]] <- gbm_model$model_summary
supplements_models[["roc_cs"]][["gbm_model"]][[model_name]] <- gbm_model$kfold_rocobjs


# see the results
gbm_model$model_summary %>% t()
roc_c
```

###### Genus level

```{r}
level="genus"
```

Aggregate taxa

```{r}
genus_data <- aggregate_taxa(ileum_asv_tab,
                             ileum_taxa_tab,
                             taxonomic_level = level)

ileum_genus_tab <- genus_data[[1]]
ileum_genus_taxa_tab <- genus_data[[2]]
```

**pre_ltx vs healthy**

```{r}
group <- c("pre_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
```

```{r}
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_ileum_uni_data <- binomial_prep(ileum_genus_tab,
                                     ileum_genus_taxa_tab,
                                     ileum_metadata,
                                     group, 
                                     usage="ml_clr")

# fit the model
gbm_model <- gbm_binomial(filt_ileum_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                              reuse=FALSE,
                              file=model_name,
                              Q="Q1",
                              overfitting_check = TRUE)

# ROC curve
roc_c <- roc_curve(gbm_model, group)

# save the results
supplements_models[["models_summ"]][["gbm_model"]][[model_name]] <- gbm_model$model_summary
supplements_models[["roc_cs"]][["gbm_model"]][[model_name]] <- gbm_model$kfold_rocobjs

# see the results
gbm_model$model_summary %>% t()
roc_c
```

**pre_ltx vs post_ltx**

```{r}
group <- c("pre_ltx","post_ltx")
comparison_name <- paste0(group[1], " vs ",group[2])
```

```{r}
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_ileum_uni_data <- binomial_prep(ileum_genus_tab,
                                     ileum_genus_taxa_tab,
                                     ileum_metadata,
                                     group, 
                                     usage="ml_clr")

# fit the model
gbm_model <- gbm_binomial(filt_ileum_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                              reuse=FALSE,
                              file=model_name,
                              Q="Q1",
                              overfitting_check = TRUE)

# ROC curve
roc_c <- roc_curve(gbm_model, group)

# save the results
supplements_models[["models_summ"]][["gbm_model"]][[model_name]] <- gbm_model$model_summary
supplements_models[["roc_cs"]][["gbm_model"]][[model_name]] <- gbm_model$kfold_rocobjs

# see the results
gbm_model$model_summary %>% t()
roc_c
```

**post_ltx vs healthy**

```{r}
group <- c("post_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
```

```{r}
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_ileum_uni_data <- binomial_prep(ileum_genus_tab,
                                     ileum_genus_taxa_tab,
                                     ileum_metadata,
                                     group, 
                                     usage="ml_clr")

# fit the model
gbm_model <- gbm_binomial(filt_ileum_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                              reuse=FALSE,
                              file=model_name,
                              Q="Q1",
                              overfitting_check = TRUE)

# ROC curve
roc_c <- roc_curve(gbm_model, group)

# save the results
supplements_models[["models_summ"]][["gbm_model"]][[model_name]] <- gbm_model$model_summary
supplements_models[["roc_cs"]][["gbm_model"]][[model_name]] <- gbm_model$kfold_rocobjs

# see the results
gbm_model$model_summary %>% t()
roc_c
```


#### Relative abundances

##### Elastic net

###### ASV level

```{r}
level="ASV"
```

***pre_ltx vs healthy***

```{r}
group <- c("pre_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
```

```{r}
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_ileum_uni_data <- binomial_prep(ileum_asv_tab,
                                     ileum_taxa_tab,
                                     ileum_metadata,
                                     group, usage="ml_ra")

# fit the model
enet_model <- glmnet_binomial(filt_ileum_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                              reuse=FALSE,
                              file=model_name,
                              Q="Q1",
                              overfitting_check = TRUE)

# ROC curve
roc_c <- roc_curve(enet_model, group)

# save the results
supplements_models[["models_summ"]][["enet_model_ra"]][[model_name]] <- enet_model$model_summary

supplements_models[["roc_cs"]][["enet_model_ra"]][[model_name]] <- enet_model$kfold_rocobjs

# see the results
enet_model$model_summary %>% t()
roc_c
```

**pre_ltx vs post_ltx**

```{r}
group <- c("pre_ltx","post_ltx")
comparison_name <- paste0(group[1], " vs ",group[2])
```

```{r}
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_ileum_uni_data <- binomial_prep(ileum_asv_tab,
                                     ileum_taxa_tab,
                                     ileum_metadata,
                                     group, usage="ml_ra")

# fit the model
enet_model <- glmnet_binomial(filt_ileum_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                              reuse=FALSE,
                              file=model_name,
                              Q="Q1",
                              overfitting_check = TRUE)

# ROC curve
roc_c <- roc_curve(enet_model, group)

# save the results
supplements_models[["models_summ"]][["enet_model_ra"]][[model_name]] <- enet_model$model_summary

supplements_models[["roc_cs"]][["enet_model_ra"]][[model_name]] <- enet_model$kfold_rocobjs

# see the results
enet_model$model_summary %>% t()
roc_c
```

**post_ltx vs healthy**

```{r}
group <- c("post_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
```

```{r}
model_name <- paste(comparison_name,level,segment)


# prepare the data
filt_ileum_uni_data <- binomial_prep(ileum_asv_tab,
                                     ileum_taxa_tab,
                                     ileum_metadata,
                                     group, usage="ml_ra")

# fit the model
enet_model <- glmnet_binomial(filt_ileum_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                              reuse=FALSE,
                              file=model_name,
                              Q="Q1",
                              overfitting_check = TRUE)

# ROC curve
roc_c <- roc_curve(enet_model, group)

# save the results
supplements_models[["models_summ"]][["enet_model_ra"]][[model_name]] <- enet_model$model_summary

supplements_models[["roc_cs"]][["enet_model_ra"]][[model_name]] <- enet_model$kfold_rocobjs

# see the results
enet_model$model_summary %>% t()
roc_c
```

###### Genus level

```{r}
level="genus"
```

Aggregate taxa

```{r}
genus_data <- aggregate_taxa(ileum_asv_tab,
                             ileum_taxa_tab,
                             taxonomic_level = level)

ileum_genus_tab <- genus_data[[1]]
ileum_genus_taxa_tab <- genus_data[[2]]
```

**pre_ltx vs healthy**

```{r}
group <- c("pre_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
```

```{r}
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_ileum_uni_data <- binomial_prep(ileum_genus_tab,
                                     ileum_genus_taxa_tab,
                                     ileum_metadata,
                                     group, 
                                     usage="ml_ra")

# fit the model
enet_model <- glmnet_binomial(filt_ileum_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                              reuse=FALSE,
                              file=model_name,
                              Q="Q1",
                              overfitting_check = TRUE)

# ROC curve
roc_c <- roc_curve(enet_model, group)

# save the results
supplements_models[["models_summ"]][["enet_model_ra"]][[model_name]] <- enet_model$model_summary

supplements_models[["roc_cs"]][["enet_model_ra"]][[model_name]] <- enet_model$kfold_rocobjs

# see the results
enet_model$model_summary %>% t()
roc_c
```

**pre_ltx vs post_ltx**

```{r}
group <- c("pre_ltx","post_ltx")
comparison_name <- paste0(group[1], " vs ",group[2])
```

```{r}
model_name <- paste(comparison_name,level,segment)


# prepare the data
filt_ileum_uni_data <- binomial_prep(ileum_genus_tab,
                                     ileum_genus_taxa_tab,
                                     ileum_metadata,
                                     group, 
                                     usage="ml_ra")

# fit the model
enet_model <- glmnet_binomial(filt_ileum_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                              reuse=FALSE,
                              file=model_name,
                              Q="Q1",
                              overfitting_check = TRUE)

# ROC curve
roc_c <- roc_curve(enet_model, group)

# save the results
supplements_models[["models_summ"]][["enet_model_ra"]][[model_name]] <- enet_model$model_summary

supplements_models[["roc_cs"]][["enet_model_ra"]][[model_name]] <- enet_model$kfold_rocobjs

# see the results
enet_model$model_summary %>% t()
roc_c
```

**post_ltx vs healthy**

```{r}
group <- c("post_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
```

```{r}
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_ileum_uni_data <- binomial_prep(ileum_genus_tab,
                                     ileum_genus_taxa_tab,
                                     ileum_metadata,
                                     group, 
                                     usage="ml_ra")

# fit the model
enet_model <- glmnet_binomial(filt_ileum_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                              reuse=FALSE,
                              file=model_name,
                              Q="Q1",
                              overfitting_check = TRUE)

# ROC curve
roc_c <- roc_curve(enet_model, group)

# save the results
supplements_models[["models_summ"]][["enet_model_ra"]][[model_name]] <- enet_model$model_summary

supplements_models[["roc_cs"]][["enet_model_ra"]][[model_name]] <- enet_model$kfold_rocobjs

# see the results
enet_model$model_summary %>% t()
roc_c
```

##### kNN

###### ASV level

```{r}
level="ASV"
```

***pre_ltx vs healthy***

```{r}
group <- c("pre_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
```

```{r}
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_ileum_uni_data <- binomial_prep(ileum_asv_tab,
                                     ileum_taxa_tab,
                                     ileum_metadata,
                                     group, usage="ml_ra")

# fit the model
knn_model <- knn_binomial(filt_ileum_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                              reuse=FALSE,
                              file=model_name,
                              Q="Q1",
                              overfitting_check = TRUE)

# ROC curve
roc_c <- roc_curve(knn_model, group)

# save the results
supplements_models[["models_summ"]][["knn_model_ra"]][[model_name]] <- knn_model$model_summary

supplements_models[["roc_cs"]][["knn_model_ra"]][[model_name]] <- knn_model$kfold_rocobjs

# see the results
knn_model$model_summary %>% t()
roc_c
```

**pre_ltx vs post_ltx**

```{r}
group <- c("pre_ltx","post_ltx")
comparison_name <- paste0(group[1], " vs ",group[2])
```

```{r}
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_ileum_uni_data <- binomial_prep(ileum_asv_tab,
                                     ileum_taxa_tab,
                                     ileum_metadata,
                                     group, usage="ml_ra")

# fit the model
knn_model <- knn_binomial(filt_ileum_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                              reuse=FALSE,
                              file=model_name,
                              Q="Q1",
                              overfitting_check = TRUE)

# ROC curve
roc_c <- roc_curve(knn_model, group)

# save the results
supplements_models[["models_summ"]][["knn_model_ra"]][[model_name]] <- knn_model$model_summary

supplements_models[["roc_cs"]][["knn_model_ra"]][[model_name]] <- knn_model$kfold_rocobjs

# see the results
knn_model$model_summary %>% t()
roc_c
```

**post_ltx vs healthy**

```{r}
group <- c("post_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
```

```{r}
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_ileum_uni_data <- binomial_prep(ileum_asv_tab,
                                     ileum_taxa_tab,
                                     ileum_metadata,
                                     group, usage="ml_ra")

# fit the model
knn_model <- knn_binomial(filt_ileum_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                              reuse=FALSE,
                              file=model_name,
                              Q="Q1",
                              overfitting_check = TRUE)

# ROC curve
roc_c <- roc_curve(knn_model, group)

# save the results
supplements_models[["models_summ"]][["knn_model_ra"]][[model_name]] <- knn_model$model_summary

supplements_models[["roc_cs"]][["knn_model_ra"]][[model_name]] <- knn_model$kfold_rocobjs

# see the results
knn_model$model_summary %>% t()
roc_c
```

###### Genus level

```{r}
level="genus"
```

Aggregate taxa

```{r}
genus_data <- aggregate_taxa(ileum_asv_tab,
                             ileum_taxa_tab,
                             taxonomic_level = level)

ileum_genus_tab <- genus_data[[1]]
ileum_genus_taxa_tab <- genus_data[[2]]
```

**pre_ltx vs healthy**

```{r}
group <- c("pre_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
```

```{r}
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_ileum_uni_data <- binomial_prep(ileum_genus_tab,
                                     ileum_genus_taxa_tab,
                                     ileum_metadata,
                                     group, 
                                     usage="ml_ra")

# fit the model
knn_model <- knn_binomial(filt_ileum_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                              reuse=FALSE,
                              file=model_name,
                              Q="Q1",
                              overfitting_check = TRUE)

# ROC curve
roc_c <- roc_curve(knn_model, group)

# save the results
supplements_models[["models_summ"]][["knn_model_ra"]][[model_name]] <- knn_model$model_summary

supplements_models[["roc_cs"]][["knn_model_ra"]][[model_name]] <- knn_model$kfold_rocobjs

# see the results
knn_model$model_summary %>% t()
roc_c
```

**pre_ltx vs post_ltx**

```{r}
group <- c("pre_ltx","post_ltx")
comparison_name <- paste0(group[1], " vs ",group[2])
```

```{r}
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_ileum_uni_data <- binomial_prep(ileum_genus_tab,
                                     ileum_genus_taxa_tab,
                                     ileum_metadata,
                                     group, 
                                     usage="ml_ra")

# fit the model
knn_model <- knn_binomial(filt_ileum_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                              reuse=FALSE,
                              file=model_name,
                              Q="Q1",
                              overfitting_check = TRUE)

# ROC curve
roc_c <- roc_curve(knn_model, group)

# save the results
supplements_models[["models_summ"]][["knn_model_ra"]][[model_name]] <- knn_model$model_summary

supplements_models[["roc_cs"]][["knn_model_ra"]][[model_name]] <- knn_model$kfold_rocobjs

# see the results
knn_model$model_summary %>% t()
roc_c
```

**post_ltx vs healthy**

```{r}
group <- c("post_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
```

```{r}
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_ileum_uni_data <- binomial_prep(ileum_genus_tab,
                                     ileum_genus_taxa_tab,
                                     ileum_metadata,
                                     group, 
                                     usage="ml_ra")

# fit the model
knn_model <- knn_binomial(filt_ileum_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                              reuse=FALSE,
                              file=model_name,
                              Q="Q1",
                              overfitting_check = TRUE)

# ROC curve
roc_c <- roc_curve(knn_model, group)

# save the results
supplements_models[["models_summ"]][["knn_model_ra"]][[model_name]] <- knn_model$model_summary

supplements_models[["roc_cs"]][["knn_model_ra"]][[model_name]] <- knn_model$kfold_rocobjs

# see the results
knn_model$model_summary %>% t()
roc_c
```

##### Random Forest

###### ASV level

```{r}
level="ASV"
```

***pre_ltx vs healthy***

```{r}
group <- c("pre_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
```

```{r}
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_ileum_uni_data <- binomial_prep(ileum_asv_tab,
                                     ileum_taxa_tab,
                                     ileum_metadata,
                                     group, usage="ml_ra")

# fit the model
rf_model <- rf_binomial(filt_ileum_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                              reuse=FALSE,
                              file=model_name,
                              Q="Q1",
                              overfitting_check = TRUE)

# ROC curve
roc_c <- roc_curve(rf_model, group)

# save the results
supplements_models[["models_summ"]][["rf_model_ra"]][[model_name]] <- rf_model$model_summary

supplements_models[["roc_cs"]][["rf_model_ra"]][[model_name]] <- rf_model$kfold_rocobjs

# see the results
rf_model$model_summary %>% t()
roc_c
```

**pre_ltx vs post_ltx**

```{r}
group <- c("pre_ltx","post_ltx")
comparison_name <- paste0(group[1], " vs ",group[2])
```

```{r}
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_ileum_uni_data <- binomial_prep(ileum_asv_tab,
                                     ileum_taxa_tab,
                                     ileum_metadata,
                                     group, usage="ml_ra")

# fit the model
rf_model <- rf_binomial(filt_ileum_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                              reuse=FALSE,
                              file=model_name,
                              Q="Q1",
                              overfitting_check = TRUE)

# ROC curve
roc_c <- roc_curve(rf_model, group)

# save the results
supplements_models[["models_summ"]][["rf_model_ra"]][[model_name]] <- rf_model$model_summary

supplements_models[["roc_cs"]][["rf_model_ra"]][[model_name]] <- rf_model$kfold_rocobjs


# see the results
rf_model$model_summary %>% t()
roc_c
```

**post_ltx vs healthy**

```{r}
group <- c("post_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
```

```{r}
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_ileum_uni_data <- binomial_prep(ileum_asv_tab,
                                     ileum_taxa_tab,
                                     ileum_metadata,
                                     group, usage="ml_ra")

# fit the model
rf_model <- rf_binomial(filt_ileum_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                              reuse=FALSE,
                              file=model_name,
                              Q="Q1",
                              overfitting_check = TRUE)

# ROC curve
roc_c <- roc_curve(rf_model, group)

# save the results
supplements_models[["models_summ"]][["rf_model_ra"]][[model_name]] <- rf_model$model_summary

supplements_models[["roc_cs"]][["rf_model_ra"]][[model_name]] <- rf_model$kfold_rocobjs


# see the results
rf_model$model_summary %>% t()
roc_c
```

###### Genus level

```{r}
level="genus"
```

Aggregate taxa

```{r}
genus_data <- aggregate_taxa(ileum_asv_tab,
                             ileum_taxa_tab,
                             taxonomic_level = level)

ileum_genus_tab <- genus_data[[1]]
ileum_genus_taxa_tab <- genus_data[[2]]
```

**pre_ltx vs healthy**

```{r}
group <- c("pre_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
```

```{r}
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_ileum_uni_data <- binomial_prep(ileum_genus_tab,
                                     ileum_genus_taxa_tab,
                                     ileum_metadata,
                                     group, 
                                     usage="ml_ra")

# fit the model
rf_model <- rf_binomial(filt_ileum_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                              reuse=FALSE,
                              file=model_name,
                              Q="Q1",
                              overfitting_check = TRUE)

# ROC curve
roc_c <- roc_curve(rf_model, group)

# save the results
supplements_models[["models_summ"]][["rf_model_ra"]][[model_name]] <- rf_model$model_summary

supplements_models[["roc_cs"]][["rf_model_ra"]][[model_name]] <- rf_model$kfold_rocobjs


# see the results
rf_model$model_summary %>% t()
roc_c
```

**pre_ltx vs post_ltx**

```{r}
group <- c("pre_ltx","post_ltx")
comparison_name <- paste0(group[1], " vs ",group[2])
```

```{r}
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_ileum_uni_data <- binomial_prep(ileum_genus_tab,
                                     ileum_genus_taxa_tab,
                                     ileum_metadata,
                                     group, 
                                     usage="ml_ra")

# fit the model
rf_model <- rf_binomial(filt_ileum_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                              reuse=FALSE,
                              file=model_name,
                              Q="Q1",
                              overfitting_check = TRUE)

# ROC curve
roc_c <- roc_curve(rf_model, group)

# save the results
supplements_models[["models_summ"]][["rf_model_ra"]][[model_name]] <- rf_model$model_summary

supplements_models[["roc_cs"]][["rf_model_ra"]][[model_name]] <- rf_model$kfold_rocobjs


# see the results
rf_model$model_summary %>% t()
roc_c
```

**post_ltx vs healthy**

```{r}
group <- c("post_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
```

```{r}
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_ileum_uni_data <- binomial_prep(ileum_genus_tab,
                                     ileum_genus_taxa_tab,
                                     ileum_metadata,
                                     group, 
                                     usage="ml_ra")

# fit the model
rf_model <- rf_binomial(filt_ileum_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                              reuse=FALSE,
                              file=model_name,
                              Q="Q1",
                              overfitting_check = TRUE)

# ROC curve
roc_c <- roc_curve(rf_model, group)

# save the results
supplements_models[["models_summ"]][["rf_model_ra"]][[model_name]] <- rf_model$model_summary

supplements_models[["roc_cs"]][["rf_model_ra"]][[model_name]] <- rf_model$kfold_rocobjs


# see the results
rf_model$model_summary %>% t()
roc_c
```

##### Gradient boosting

###### ASV level

```{r}
level="ASV"
```

***pre_ltx vs healthy***

```{r}
group <- c("pre_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
```

```{r}
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_ileum_uni_data <- binomial_prep(ileum_asv_tab,
                                     ileum_taxa_tab,
                                     ileum_metadata,
                                     group, usage="ml_ra")

# fit the model
gbm_model <- gbm_binomial(filt_ileum_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                              reuse=FALSE,
                              file=model_name,
                              Q="Q1",
                              overfitting_check = TRUE)

# ROC curve
roc_c <- roc_curve(gbm_model, group)

# save the results
supplements_models[["models_summ"]][["gbm_model_ra"]][[model_name]] <- gbm_model$model_summary

supplements_models[["roc_cs"]][["gbm_model_ra"]][[model_name]] <- gbm_model$kfold_rocobjs

# see the results
gbm_model$model_summary %>% t()
roc_c
```

**pre_ltx vs post_ltx**

```{r}
group <- c("pre_ltx","post_ltx")
comparison_name <- paste0(group[1], " vs ",group[2])
```

```{r}
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_ileum_uni_data <- binomial_prep(ileum_asv_tab,
                                     ileum_taxa_tab,
                                     ileum_metadata,
                                     group, usage="ml_ra")

# fit the model
gbm_model <- gbm_binomial(filt_ileum_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                              reuse=FALSE,
                              file=model_name,
                              Q="Q1",
                              overfitting_check = TRUE)

# ROC curve
roc_c <- roc_curve(gbm_model, group)

# save the results
supplements_models[["models_summ"]][["gbm_model_ra"]][[model_name]] <- gbm_model$model_summary

supplements_models[["roc_cs"]][["gbm_model_ra"]][[model_name]] <- gbm_model$kfold_rocobjs

# see the results
gbm_model$model_summary %>% t()
roc_c
```

**post_ltx vs healthy**

```{r}
group <- c("post_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
```

```{r}
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_ileum_uni_data <- binomial_prep(ileum_asv_tab,
                                     ileum_taxa_tab,
                                     ileum_metadata,
                                     group, usage="ml_ra")

# fit the model
gbm_model <- gbm_binomial(filt_ileum_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                              reuse=FALSE,
                              file=model_name,
                              Q="Q1",
                              overfitting_check = TRUE)

# ROC curve
roc_c <- roc_curve(gbm_model, group)

# save the results
supplements_models[["models_summ"]][["gbm_model_ra"]][[model_name]] <- gbm_model$model_summary

supplements_models[["roc_cs"]][["gbm_model_ra"]][[model_name]] <- gbm_model$kfold_rocobjs

# see the results
gbm_model$model_summary %>% t()
roc_c
```

###### Genus level

```{r}
level="genus"
```

Aggregate taxa

```{r}
genus_data <- aggregate_taxa(ileum_asv_tab,
                             ileum_taxa_tab,
                             taxonomic_level = level)

ileum_genus_tab <- genus_data[[1]]
ileum_genus_taxa_tab <- genus_data[[2]]
```

**pre_ltx vs healthy**

```{r}
group <- c("pre_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
```

```{r}
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_ileum_uni_data <- binomial_prep(ileum_genus_tab,
                                     ileum_genus_taxa_tab,
                                     ileum_metadata,
                                     group, 
                                     usage="ml_ra")

# fit the model
gbm_model <- gbm_binomial(filt_ileum_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                              reuse=FALSE,
                              file=model_name,
                              Q="Q1",
                              overfitting_check = TRUE)

# ROC curve
roc_c <- roc_curve(gbm_model, group)

# save the results
supplements_models[["models_summ"]][["gbm_model_ra"]][[model_name]] <- gbm_model$model_summary

supplements_models[["roc_cs"]][["gbm_model_ra"]][[model_name]] <- gbm_model$kfold_rocobjs

# see the results
gbm_model$model_summary %>% t()
roc_c
```

**pre_ltx vs post_ltx**

```{r}
group <- c("pre_ltx","post_ltx")
comparison_name <- paste0(group[1], " vs ",group[2])
```

```{r}
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_ileum_uni_data <- binomial_prep(ileum_genus_tab,
                                     ileum_genus_taxa_tab,
                                     ileum_metadata,
                                     group, 
                                     usage="ml_ra")

# fit the model
gbm_model <- gbm_binomial(filt_ileum_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                              reuse=FALSE,
                              file=model_name,
                              Q="Q1",
                              overfitting_check = TRUE)

# ROC curve
roc_c <- roc_curve(gbm_model, group)

# save the results
supplements_models[["models_summ"]][["gbm_model_ra"]][[model_name]] <- gbm_model$model_summary

supplements_models[["roc_cs"]][["gbm_model_ra"]][[model_name]] <- gbm_model$kfold_rocobjs

# see the results
gbm_model$model_summary %>% t()
roc_c
```

**post_ltx vs healthy**

```{r}
group <- c("post_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
```

```{r}
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_ileum_uni_data <- binomial_prep(ileum_genus_tab,
                                     ileum_genus_taxa_tab,
                                     ileum_metadata,
                                     group, 
                                     usage="ml_ra")

# fit the model
gbm_model <- gbm_binomial(filt_ileum_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                              reuse=FALSE,
                              file=model_name,
                              Q="Q1",
                              overfitting_check = TRUE)

# ROC curve
roc_c <- roc_curve(gbm_model, group)

# save the results
supplements_models[["models_summ"]][["gbm_model_ra"]][[model_name]] <- gbm_model$model_summary

supplements_models[["roc_cs"]][["gbm_model_ra"]][[model_name]] <- gbm_model$kfold_rocobjs

# see the results
gbm_model$model_summary %>% t()
roc_c
```

#### Saving results

```{r}
models_list <- list()

for (model_name in names(supplements_models$models_summ)){
  df <- do.call(rbind, supplements_models$models_summ[[model_name]])
  models_list[[model_name]] <- df
}

write.xlsx(models_list,
           file=file.path(path,paste0("supplements_models_",segment,".xlsx")),
           rowNames=TRUE)

```

# Data Analysis - Colon

```{r}
segment="colon"
```

## Machine learning

```{r}
path = "../results/Q1/models_overfitting_check"
```

### ElasticNet

```{r}
model="enet"
```

#### ASV level

```{r}
level="ASV"
```

##### pre_ltx vs healthy

```{r}
group <- c("pre_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
```

```{r}
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_colon_uni_data <- binomial_prep(colon_asv_tab,
                                     colon_taxa_tab,
                                     colon_metadata,
                                     group, 
                                     usage="ml_clr",
                                     patient = TRUE)

# fit the model
enet_model <- glmnet_binomial(filt_colon_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                              clust_var="Patient",
                              reuse=FALSE,
                              file=model_name,
                              Q="Q1",
                              overfitting_check = TRUE)

# ROC
roc_c <- roc_curve(enet_model, group)

models_summ[[model_name]] <- enet_model$model_summary
models_cm[[model_name]] <- enet_model$conf_matrices$original
roc_cs[[model_name]] <- enet_model$kfold_rocobjs
betas[[model_name]] <- as.matrix(enet_model$betas)

# see the results
enet_model$model_summary %>% t()
enet_model$conf_matrices
enet_model$plot
roc_c
```

##### pre_ltx vs post_ltx

```{r}
group <- c("pre_ltx","post_ltx")
comparison_name <- paste0(group[1], " vs ",group[2])
```

```{r}
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_colon_uni_data <- binomial_prep(colon_asv_tab,
                                     colon_taxa_tab,
                                     colon_metadata,
                                     group, usage="ml_clr",
                                     patient = TRUE)

# fit the model
enet_model <- glmnet_binomial(filt_colon_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", N=10,
                              clust_var="Patient",
                              reuse=FALSE,
                              file=model_name,
                              Q="Q1",
                              overfitting_check = TRUE)

# ROC
roc_c <- roc_curve(enet_model, group)

# save the results
models_summ[[model_name]] <- enet_model$model_summary
models_cm[[model_name]] <- enet_model$conf_matrices$original
roc_cs[[model_name]] <- enet_model$kfold_rocobjs
betas[[model_name]] <- as.matrix(enet_model$betas)

# see the results
enet_model$model_summary %>% t()
enet_model$conf_matrices
enet_model$plot

roc_c
```

##### post_ltx vs healthy

```{r}
group <- c("post_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
```

```{r}
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_colon_uni_data <- binomial_prep(colon_asv_tab,
                                     colon_taxa_tab,
                                     colon_metadata,
                                     group, 
                                     usage="ml_clr",
                                     patient = TRUE)

# fit the model
enet_model <- glmnet_binomial(filt_colon_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                              clust_var="Patient",
                              reuse=FALSE,
                              file=model_name,
                              Q="Q1",
                              overfitting_check = TRUE)

# ROC
roc_c <- roc_curve(enet_model, group)

# save the results
models_summ[[model_name]] <- enet_model$model_summary
models_cm[[model_name]] <- enet_model$conf_matrices$original
roc_cs[[model_name]] <- enet_model$kfold_rocobjs
betas[[model_name]] <- as.matrix(enet_model$betas)

# see the results
enet_model$model_summary %>% t()
enet_model$conf_matrices
enet_model$plot

roc_c
```

#### Genus level

```{r}
level="genus"
```

Aggregate taxa

```{r}
genus_data <- aggregate_taxa(colon_asv_tab,
                             colon_taxa_tab,
                             taxonomic_level = level)

colon_genus_tab <- genus_data[[1]]
colon_genus_taxa_tab <- genus_data[[2]]
```

##### pre_ltx vs healthy

```{r}
group <- c("pre_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
```

```{r}
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_colon_uni_data <- binomial_prep(colon_genus_tab,
                                     colon_genus_taxa_tab,
                                     colon_metadata,
                                     group, 
                                     usage="ml_clr",
                                     patient = TRUE)

# fit the model
enet_model <- glmnet_binomial(filt_colon_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                              clust_var="Patient",
                              reuse=FALSE,
                              file=model_name,
                              Q="Q1",overfitting_check = TRUE)


# ROC
roc_c <- roc_curve(enet_model, group)

# save the results
models_summ[[model_name]] <- enet_model$model_summary
models_cm[[model_name]] <- enet_model$conf_matrices$original
roc_cs[[model_name]] <- enet_model$kfold_rocobjs
betas[[model_name]] <- as.matrix(enet_model$betas)

# see the results
enet_model$model_summary %>% t()
enet_model$conf_matrices
enet_model$plot

roc_c
```

##### pre_ltx vs post_ltx

```{r}
group <- c("pre_ltx","post_ltx")
comparison_name <- paste0(group[1], " vs ",group[2])
```

```{r}
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_colon_uni_data <- binomial_prep(colon_genus_tab,
                                     colon_genus_taxa_tab,
                                     colon_metadata,
                                     group, 
                                     usage="ml_clr",
                                     patient = TRUE)

# fit the model
enet_model <- glmnet_binomial(filt_colon_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                              clust_var="Patient",
                              reuse=FALSE,
                              file=model_name,
                              Q="Q1",
                              overfitting_check = TRUE)

# ROC
roc_c <- roc_curve(enet_model, group)

# save the results
models_summ[[model_name]] <- enet_model$model_summary
models_cm[[model_name]] <- enet_model$conf_matrices$original
roc_cs[[model_name]] <- enet_model$kfold_rocobjs
betas[[model_name]] <- as.matrix(enet_model$betas)

# see the results
enet_model$model_summary %>% t()
enet_model$conf_matrices
enet_model$plot

roc_c
```

##### post_ltx vs healthy

```{r}
group <- c("post_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
```

```{r}
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_colon_uni_data <- binomial_prep(colon_genus_tab,
                                     colon_genus_taxa_tab,
                                     colon_metadata,
                                     group, 
                                     usage="ml_clr",
                                     patient = TRUE)

# fit the model
enet_model <- glmnet_binomial(filt_colon_uni_data,
                              sample_method = "atypboot",
                              outcome="Group",
                              N=10,
                              clust_var="Patient",
                              reuse=FALSE,
                              file=model_name,
                              Q="Q1",
                              overfitting_check = TRUE)

# ROC
roc_c <- roc_curve(enet_model, group)

# save the results
models_summ[[model_name]] <- enet_model$model_summary
models_cm[[model_name]] <- enet_model$conf_matrices$original
roc_cs[[model_name]] <- enet_model$kfold_rocobjs
betas[[model_name]] <- as.matrix(enet_model$betas)

# see the results
enet_model$model_summary %>% t()
enet_model$conf_matrices
enet_model$plot
roc_c
```

#### Saving results

```{r}
models_summ_df_colon <- do.call(rbind, 
  models_summ[grep(segment,names(models_summ),value = TRUE)])

write.csv(models_summ_df_colon,file.path(path,paste0("elastic_net_",segment,".csv")))
```

### Supplementary models

#### CLR-transformed data

##### kNN

```{r}
model="knn"
```

###### ASV level

```{r}
level="ASV"
```

***pre_ltx vs healthy***

```{r}
group <- c("pre_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
```

```{r}
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_colon_uni_data <- binomial_prep(colon_asv_tab,
                                     colon_taxa_tab,
                                     colon_metadata,
                                     group, usage="ml_clr",
                                     patient = TRUE)

# fit the model
knn_model <- knn_binomial(filt_colon_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                              clust_var="Patient",
                              reuse=FALSE,
                              file=model_name,
                              Q="Q1",
                              overfitting_check = TRUE
                          
                          )

# ROC curve
roc_c <- roc_curve(knn_model, group)

# save the results
supplements_models[["models_summ"]][["knn_model"]][[model_name]] <- knn_model$model_summary
supplements_models[["roc_cs"]][["knn_model"]][[model_name]] <- knn_model$kfold_rocobjs

# see the results
knn_model$model_summary %>% t()
roc_c
```

**pre_ltx vs post_ltx**

```{r}
group <- c("pre_ltx","post_ltx")
comparison_name <- paste0(group[1], " vs ",group[2])
```

```{r}
model_name <- paste(comparison_name,level,segment)


# prepare the data
filt_colon_uni_data <- binomial_prep(colon_asv_tab,
                                     colon_taxa_tab,
                                     colon_metadata,
                                     group, usage="ml_clr",
                                     patient = TRUE)

# fit the model
knn_model <- knn_binomial(filt_colon_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                          clust_var="Patient",
                              reuse=FALSE,
                              file=model_name,
                              Q="Q1",
                          overfitting_check = TRUE)

# ROC curve
roc_c <- roc_curve(knn_model, group)

# save the results
supplements_models[["models_summ"]][["knn_model"]][[model_name]] <- knn_model$model_summary
supplements_models[["roc_cs"]][["knn_model"]][[model_name]] <- knn_model$kfold_rocobjs


# see the results
knn_model$model_summary %>% t()
roc_c
```

**post_ltx vs healthy**

```{r}
group <- c("post_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
```

```{r}
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_colon_uni_data <- binomial_prep(colon_asv_tab,
                                     colon_taxa_tab,
                                     colon_metadata,
                                     group, usage="ml_clr",
                                     patient = TRUE)

# fit the model
knn_model <- knn_binomial(filt_colon_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                          clust_var="Patient",
                              reuse=FALSE,
                              file=model_name,
                              Q="Q1",
                          overfitting_check = TRUE)

# ROC curve
roc_c <- roc_curve(knn_model, group)

# save the results
# save the results
supplements_models[["models_summ"]][["knn_model"]][[model_name]] <- knn_model$model_summary
supplements_models[["roc_cs"]][["knn_model"]][[model_name]] <- knn_model$kfold_rocobjs

# see the results
knn_model$model_summary %>% t()
roc_c
```

###### Genus level

```{r}
level="genus"
```

Aggregate taxa

```{r}
genus_data <- aggregate_taxa(colon_asv_tab,
                             colon_taxa_tab,
                             taxonomic_level = level)

colon_genus_tab <- genus_data[[1]]
colon_genus_taxa_tab <- genus_data[[2]]
```

**pre_ltx vs healthy**

```{r}
group <- c("pre_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
```

```{r}
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_colon_uni_data <- binomial_prep(colon_genus_tab,
                                     colon_genus_taxa_tab,
                                     colon_metadata,
                                     group, 
                                     usage="ml_clr",
                                     patient = TRUE)

# fit the model
knn_model <- knn_binomial(filt_colon_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                          clust_var="Patient",
                              reuse=FALSE,
                              file=model_name,
                              Q="Q1",
                          overfitting_check = TRUE)

# ROC curve
roc_c <- roc_curve(knn_model, group)

# save the results
supplements_models[["models_summ"]][["knn_model"]][[model_name]] <- knn_model$model_summary
supplements_models[["roc_cs"]][["knn_model"]][[model_name]] <- knn_model$kfold_rocobjs


# see the results
knn_model$model_summary %>% t()
roc_c
```

**pre_ltx vs post_ltx**

```{r}
group <- c("pre_ltx","post_ltx")
comparison_name <- paste0(group[1], " vs ",group[2])
```

```{r}
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_colon_uni_data <- binomial_prep(colon_genus_tab,
                                     colon_genus_taxa_tab,
                                     colon_metadata,
                                     group, 
                                     usage="ml_clr",
                                     patient = TRUE)

# fit the model
knn_model <- knn_binomial(filt_colon_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                          clust_var="Patient",
                              reuse=FALSE,
                              file=model_name,
                              Q="Q1",
                          overfitting_check = TRUE)

# ROC curve
roc_c <- roc_curve(knn_model, group)

# save the results
supplements_models[["models_summ"]][["knn_model"]][[model_name]] <- knn_model$model_summary
supplements_models[["roc_cs"]][["knn_model"]][[model_name]] <- knn_model$kfold_rocobjs


# see the results
knn_model$model_summary %>% t()
roc_c
```

**post_ltx vs healthy**

```{r}
group <- c("post_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
```

```{r}
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_colon_uni_data <- binomial_prep(colon_genus_tab,
                                     colon_genus_taxa_tab,
                                     colon_metadata,
                                     group, 
                                     usage="ml_clr",
                                     patient = TRUE)

# fit the model
knn_model <- knn_binomial(filt_colon_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                          clust_var="Patient",
                              reuse=FALSE,
                              file=model_name,
                              Q="Q1",
                          overfitting_check = TRUE)

# ROC curve
roc_c <- roc_curve(knn_model, group)

# save the results
supplements_models[["models_summ"]][["knn_model"]][[model_name]] <- knn_model$model_summary
supplements_models[["roc_cs"]][["knn_model"]][[model_name]] <- knn_model$kfold_rocobjs


# see the results
knn_model$model_summary %>% t()
roc_c
```

##### Random Forest

```{r}
model="rf"
```

###### ASV level

```{r}
level="ASV"
```

***pre_ltx vs healthy***

```{r}
group <- c("pre_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
```

```{r}
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_colon_uni_data <- binomial_prep(colon_asv_tab,
                                     colon_taxa_tab,
                                     colon_metadata,
                                     group, usage="ml_clr",
                                     patient = TRUE)

# fit the model
rf_model <- rf_binomial(filt_colon_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                        clust_var="Patient",
                              reuse=FALSE,
                              file=model_name,
                              Q="Q1",
                        overfitting_check = TRUE)

# ROC curve
roc_c <- roc_curve(rf_model, group)

# save the results
supplements_models[["models_summ"]][["rf_model"]][[model_name]] <- rf_model$model_summary
supplements_models[["roc_cs"]][["rf_model"]][[model_name]] <- rf_model$kfold_rocobjs

# see the results
rf_model$model_summary %>% t()
roc_c
```

**pre_ltx vs post_ltx**

```{r}
group <- c("pre_ltx","post_ltx")
comparison_name <- paste0(group[1], " vs ",group[2])
```

```{r}
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_colon_uni_data <- binomial_prep(colon_asv_tab,
                                     colon_taxa_tab,
                                     colon_metadata,
                                     group, usage="ml_clr",
                                     patient = TRUE)

# fit the model
rf_model <- rf_binomial(filt_colon_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                        clust_var="Patient",
                              reuse=FALSE,
                              file=model_name,
                              Q="Q1",
                        overfitting_check = TRUE,
                        )

# ROC curve
roc_c <- roc_curve(rf_model, group)

# save the results
supplements_models[["models_summ"]][["rf_model"]][[model_name]] <- rf_model$model_summary
supplements_models[["roc_cs"]][["rf_model"]][[model_name]] <- rf_model$kfold_rocobjs

# see the results
rf_model$model_summary %>% t()
roc_c
```

**post_ltx vs healthy**

```{r}
group <- c("post_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
```

```{r}
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_colon_uni_data <- binomial_prep(colon_asv_tab,
                                     colon_taxa_tab,
                                     colon_metadata,
                                     group, usage="ml_clr",
                                     patient = TRUE)

# fit the model
rf_model <- rf_binomial(filt_colon_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                        clust_var="Patient",
                              reuse=FALSE,
                              file=model_name,
                              Q="Q1",
                        overfitting_check = TRUE)

# ROC curve
roc_c <- roc_curve(rf_model, group)

# save the results
supplements_models[["models_summ"]][["rf_model"]][[model_name]] <- rf_model$model_summary
supplements_models[["roc_cs"]][["rf_model"]][[model_name]] <- rf_model$kfold_rocobjs

# see the results
rf_model$model_summary %>% t()
roc_c
```

###### Genus level

```{r}
level="genus"
```

Aggregate taxa

```{r}
genus_data <- aggregate_taxa(colon_asv_tab,
                             colon_taxa_tab,
                             taxonomic_level = level)

colon_genus_tab <- genus_data[[1]]
colon_genus_taxa_tab <- genus_data[[2]]
```

**pre_ltx vs healthy**

```{r}
group <- c("pre_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
```

```{r}
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_colon_uni_data <- binomial_prep(colon_genus_tab,
                                     colon_genus_taxa_tab,
                                     colon_metadata,
                                     group, 
                                     usage="ml_clr",
                                     patient = TRUE)

# fit the model
rf_model <- rf_binomial(filt_colon_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                        clust_var="Patient",
                              reuse=FALSE,
                              file=model_name,
                              Q="Q1",
                        overfitting_check = TRUE)

# ROC curve
roc_c <- roc_curve(rf_model, group)

# save the results
supplements_models[["models_summ"]][["rf_model"]][[model_name]] <- rf_model$model_summary
supplements_models[["roc_cs"]][["rf_model"]][[model_name]] <- rf_model$kfold_rocobjs

# see the results
rf_model$model_summary %>% t()
roc_c
```

**pre_ltx vs post_ltx**

```{r}
group <- c("pre_ltx","post_ltx")
comparison_name <- paste0(group[1], " vs ",group[2])
```

```{r}
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_colon_uni_data <- binomial_prep(colon_genus_tab,
                                     colon_genus_taxa_tab,
                                     colon_metadata,
                                     group, 
                                     usage="ml_clr",
                                     patient = TRUE)

# fit the model
rf_model <- rf_binomial(filt_colon_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                        clust_var="Patient",
                              reuse=FALSE,
                              file=model_name,
                              Q="Q1",
                        overfitting_check = TRUE)

# ROC curve
roc_c <- roc_curve(rf_model, group)

# save the results
supplements_models[["models_summ"]][["rf_model"]][[model_name]] <- rf_model$model_summary
supplements_models[["roc_cs"]][["rf_model"]][[model_name]] <- rf_model$kfold_rocobjs

# see the results
rf_model$model_summary %>% t()
roc_c
```

**post_ltx vs healthy**

```{r}
group <- c("post_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
```

```{r}
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_colon_uni_data <- binomial_prep(colon_genus_tab,
                                     colon_genus_taxa_tab,
                                     colon_metadata,
                                     group, 
                                     usage="ml_clr",
                                     patient = TRUE)

# fit the model
rf_model <- rf_binomial(filt_colon_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                        clust_var="Patient",
                              reuse=FALSE,
                              file=model_name,
                              Q="Q1",
                        overfitting_check = TRUE)

# ROC curve
roc_c <- roc_curve(rf_model, group)

# save the results
supplements_models[["models_summ"]][["rf_model"]][[model_name]] <- rf_model$model_summary
supplements_models[["roc_cs"]][["rf_model"]][[model_name]] <- rf_model$kfold_rocobjs

# see the results
rf_model$model_summary %>% t()
roc_c
```

##### Gradient boosting

```{r}
model="gb"
```

###### ASV level

```{r}
level="ASV"
```

***pre_ltx vs healthy***

```{r}
group <- c("pre_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
```

```{r}
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_colon_uni_data <- binomial_prep(colon_asv_tab,
                                     colon_taxa_tab,
                                     colon_metadata,
                                     group, usage="ml_clr",
                                     patient = TRUE)

# fit the model
gbm_model <- gbm_binomial(filt_colon_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                          clust_var="Patient",
                              reuse=FALSE,
                              file=model_name,
                              Q="Q1",
                          overfitting_check = TRUE)

# ROC curve
roc_c <- roc_curve(gbm_model, group)

# save the results
supplements_models[["models_summ"]][["gbm_model"]][[model_name]] <- gbm_model$model_summary
supplements_models[["roc_cs"]][["gbm_model"]][[model_name]] <- gbm_model$kfold_rocobjs

# see the results
gbm_model$model_summary %>% t()
roc_c
```

**pre_ltx vs post_ltx**

```{r}
group <- c("pre_ltx","post_ltx")
comparison_name <- paste0(group[1], " vs ",group[2])
```

```{r}
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_colon_uni_data <- binomial_prep(colon_asv_tab,
                                     colon_taxa_tab,
                                     colon_metadata,
                                     group, usage="ml_clr",
                                     patient = TRUE)

# fit the model
gbm_model <- gbm_binomial(filt_colon_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                          clust_var="Patient",
                              reuse=FALSE,
                              file=model_name,
                              Q="Q1",
                          overfitting_check = TRUE)

# ROC curve
roc_c <- roc_curve(gbm_model, group)

# save the results
supplements_models[["models_summ"]][["gbm_model"]][[model_name]] <- gbm_model$model_summary
supplements_models[["roc_cs"]][["gbm_model"]][[model_name]] <- gbm_model$kfold_rocobjs


# see the results
gbm_model$model_summary %>% t()
roc_c
```

**post_ltx vs healthy**

```{r}
group <- c("post_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
```

```{r}
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_colon_uni_data <- binomial_prep(colon_asv_tab,
                                     colon_taxa_tab,
                                     colon_metadata,
                                     group, usage="ml_clr",
                                     patient = TRUE)

# fit the model
gbm_model <- gbm_binomial(filt_colon_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                          clust_var="Patient",
                              reuse=FALSE,
                              file=model_name,
                              Q="Q1",
                          overfitting_check = TRUE)

# ROC curve
roc_c <- roc_curve(gbm_model, group)

# save the results
supplements_models[["models_summ"]][["gbm_model"]][[model_name]] <- gbm_model$model_summary
supplements_models[["roc_cs"]][["gbm_model"]][[model_name]] <- gbm_model$kfold_rocobjs


# see the results
gbm_model$model_summary %>% t()
roc_c
```

###### Genus level

```{r}
level="genus"
```

Aggregate taxa

```{r}
genus_data <- aggregate_taxa(colon_asv_tab,
                             colon_taxa_tab,
                             taxonomic_level = level)

colon_genus_tab <- genus_data[[1]]
colon_genus_taxa_tab <- genus_data[[2]]
```

**pre_ltx vs healthy**

```{r}
group <- c("pre_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
```

```{r}
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_colon_uni_data <- binomial_prep(colon_genus_tab,
                                     colon_genus_taxa_tab,
                                     colon_metadata,
                                     group, 
                                     usage="ml_clr",
                                     patient = TRUE)

# fit the model
gbm_model <- gbm_binomial(filt_colon_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                          clust_var="Patient",
                              reuse=FALSE,
                              file=model_name,
                              Q="Q1",
                          overfitting_check = TRUE)

# ROC curve
roc_c <- roc_curve(gbm_model, group)

# save the results
supplements_models[["models_summ"]][["gbm_model"]][[model_name]] <- gbm_model$model_summary
supplements_models[["roc_cs"]][["gbm_model"]][[model_name]] <- gbm_model$kfold_rocobjs

# see the results
gbm_model$model_summary %>% t()
roc_c
```

**pre_ltx vs post_ltx**

```{r}
group <- c("pre_ltx","post_ltx")
comparison_name <- paste0(group[1], " vs ",group[2])
```

```{r}
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_colon_uni_data <- binomial_prep(colon_genus_tab,
                                     colon_genus_taxa_tab,
                                     colon_metadata,
                                     group, 
                                     usage="ml_clr",
                                     patient = TRUE)

# fit the model
gbm_model <- gbm_binomial(filt_colon_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                          clust_var="Patient",
                              reuse=FALSE,
                              file=model_name,
                              Q="Q1",
                          overfitting_check = TRUE)

# ROC curve
roc_c <- roc_curve(gbm_model, group)

# save the results
supplements_models[["models_summ"]][["gbm_model"]][[model_name]] <- gbm_model$model_summary
supplements_models[["roc_cs"]][["gbm_model"]][[model_name]] <- gbm_model$kfold_rocobjs

# see the results
gbm_model$model_summary %>% t()
roc_c
```

**post_ltx vs healthy**

```{r}
group <- c("post_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
```

```{r}
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_colon_uni_data <- binomial_prep(colon_genus_tab,
                                     colon_genus_taxa_tab,
                                     colon_metadata,
                                     group, 
                                     usage="ml_clr",
                                     patient = TRUE)

# fit the model
gbm_model <- gbm_binomial(filt_colon_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                          clust_var="Patient",
                              reuse=FALSE,
                              file=model_name,
                              Q="Q1",
                          overfitting_check = TRUE)

# ROC curve
roc_c <- roc_curve(gbm_model, group)

# save the results
supplements_models[["models_summ"]][["gbm_model"]][[model_name]] <- gbm_model$model_summary
supplements_models[["roc_cs"]][["gbm_model"]][[model_name]] <- gbm_model$kfold_rocobjs

# see the results
gbm_model$model_summary %>% t()
roc_c
```


#### Relative abundances

##### Elastic net

###### ASV level

```{r}
level="ASV"
```

***pre_ltx vs healthy***

```{r}
group <- c("pre_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
```

```{r}
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_colon_uni_data <- binomial_prep(colon_asv_tab,
                                     colon_taxa_tab,
                                     colon_metadata,
                                     group, usage="ml_ra",
                                     patient = TRUE)

# fit the model
enet_model <- glmnet_binomial(filt_colon_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                              clust_var="Patient",
                              reuse=FALSE,
                              file=model_name,
                              Q="Q1",
                              overfitting_check = TRUE)

# ROC curve
roc_c <- roc_curve(enet_model, group)

# save the results
supplements_models[["models_summ"]][["enet_model_ra"]][[model_name]] <- enet_model$model_summary

supplements_models[["roc_cs"]][["enet_model_ra"]][[model_name]] <- enet_model$kfold_rocobjs

# see the results
enet_model$model_summary %>% t()
roc_c
```

**pre_ltx vs post_ltx**

```{r}
group <- c("pre_ltx","post_ltx")
comparison_name <- paste0(group[1], " vs ",group[2])
```

```{r}
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_colon_uni_data <- binomial_prep(colon_asv_tab,
                                     colon_taxa_tab,
                                     colon_metadata,
                                     group, usage="ml_ra",
                                     patient = TRUE)

# fit the model
enet_model <- glmnet_binomial(filt_colon_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                              clust_var="Patient",
                              reuse=FALSE,
                              file=model_name,
                              Q="Q1",
                              overfitting_check = TRUE)

# ROC curve
roc_c <- roc_curve(enet_model, group)

# save the results
supplements_models[["models_summ"]][["enet_model_ra"]][[model_name]] <- enet_model$model_summary

supplements_models[["roc_cs"]][["enet_model_ra"]][[model_name]] <- enet_model$kfold_rocobjs

# see the results
enet_model$model_summary %>% t()
roc_c
```

**post_ltx vs healthy**

```{r}
group <- c("post_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
```

```{r}
model_name <- paste(comparison_name,level,segment)


# prepare the data
filt_colon_uni_data <- binomial_prep(colon_asv_tab,
                                     colon_taxa_tab,
                                     colon_metadata,
                                     group, usage="ml_ra",
                                     patient = TRUE)

# fit the model
enet_model <- glmnet_binomial(filt_colon_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                              clust_var="Patient",
                              reuse=FALSE,
                              file=model_name,
                              Q="Q1",
                              overfitting_check = TRUE)

# ROC curve
roc_c <- roc_curve(enet_model, group)

# save the results
supplements_models[["models_summ"]][["enet_model_ra"]][[model_name]] <- enet_model$model_summary

supplements_models[["roc_cs"]][["enet_model_ra"]][[model_name]] <- enet_model$kfold_rocobjs

# see the results
enet_model$model_summary %>% t()
roc_c
```

###### Genus level

```{r}
level="genus"
```

Aggregate taxa

```{r}
genus_data <- aggregate_taxa(colon_asv_tab,
                             colon_taxa_tab,
                             taxonomic_level = level)

colon_genus_tab <- genus_data[[1]]
colon_genus_taxa_tab <- genus_data[[2]]
```

**pre_ltx vs healthy**

```{r}
group <- c("pre_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
```

```{r}
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_colon_uni_data <- binomial_prep(colon_genus_tab,
                                     colon_genus_taxa_tab,
                                     colon_metadata,
                                     group, 
                                     usage="ml_ra",
                                     patient = TRUE)

# fit the model
enet_model <- glmnet_binomial(filt_colon_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                              clust_var="Patient",
                              reuse=FALSE,
                              file=model_name,
                              Q="Q1",
                              overfitting_check = TRUE)

# ROC curve
roc_c <- roc_curve(enet_model, group)

# save the results
supplements_models[["models_summ"]][["enet_model_ra"]][[model_name]] <- enet_model$model_summary

supplements_models[["roc_cs"]][["enet_model_ra"]][[model_name]] <- enet_model$kfold_rocobjs

# see the results
enet_model$model_summary %>% t()
roc_c
```

**pre_ltx vs post_ltx**

```{r}
group <- c("pre_ltx","post_ltx")
comparison_name <- paste0(group[1], " vs ",group[2])
```

```{r}
model_name <- paste(comparison_name,level,segment)


# prepare the data
filt_colon_uni_data <- binomial_prep(colon_genus_tab,
                                     colon_genus_taxa_tab,
                                     colon_metadata,
                                     group, 
                                     usage="ml_ra",
                                     patient = TRUE)

# fit the model
enet_model <- glmnet_binomial(filt_colon_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                              clust_var="Patient",
                              reuse=FALSE,
                              file=model_name,
                              Q="Q1",
                              overfitting_check = TRUE)

# ROC curve
roc_c <- roc_curve(enet_model, group)

# save the results
supplements_models[["models_summ"]][["enet_model_ra"]][[model_name]] <- enet_model$model_summary

supplements_models[["roc_cs"]][["enet_model_ra"]][[model_name]] <- enet_model$kfold_rocobjs

# see the results
enet_model$model_summary %>% t()
roc_c
```

**post_ltx vs healthy**

```{r}
group <- c("post_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
```

```{r}
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_colon_uni_data <- binomial_prep(colon_genus_tab,
                                     colon_genus_taxa_tab,
                                     colon_metadata,
                                     group, 
                                     usage="ml_ra",
                                     patient = TRUE)

# fit the model
enet_model <- glmnet_binomial(filt_colon_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                              clust_var="Patient",
                              reuse=FALSE,
                              file=model_name,
                              Q="Q1",
                              overfitting_check = TRUE)

# ROC curve
roc_c <- roc_curve(enet_model, group)

# save the results
supplements_models[["models_summ"]][["enet_model_ra"]][[model_name]] <- enet_model$model_summary

supplements_models[["roc_cs"]][["enet_model_ra"]][[model_name]] <- enet_model$kfold_rocobjs

# see the results
enet_model$model_summary %>% t()
roc_c
```

##### kNN

###### ASV level

```{r}
level="ASV"
```

***pre_ltx vs healthy***

```{r}
group <- c("pre_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
```

```{r}
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_colon_uni_data <- binomial_prep(colon_asv_tab,
                                     colon_taxa_tab,
                                     colon_metadata,
                                     group, usage="ml_ra",
                                     patient = TRUE)

# fit the model
knn_model <- knn_binomial(filt_colon_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                          clust_var="Patient",
                              reuse=FALSE,
                              file=model_name,
                              Q="Q1",
                          overfitting_check = TRUE)

# ROC curve
roc_c <- roc_curve(knn_model, group)

# save the results
supplements_models[["models_summ"]][["knn_model_ra"]][[model_name]] <- knn_model$model_summary

supplements_models[["roc_cs"]][["knn_model_ra"]][[model_name]] <- knn_model$kfold_rocobjs

# see the results
knn_model$model_summary %>% t()
roc_c
```

**pre_ltx vs post_ltx**

```{r}
group <- c("pre_ltx","post_ltx")
comparison_name <- paste0(group[1], " vs ",group[2])
```

```{r}
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_colon_uni_data <- binomial_prep(colon_asv_tab,
                                     colon_taxa_tab,
                                     colon_metadata,
                                     group, usage="ml_ra",
                                     patient = TRUE)

# fit the model
knn_model <- knn_binomial(filt_colon_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                          clust_var="Patient",
                              reuse=FALSE,
                              file=model_name,
                              Q="Q1",
                          overfitting_check = TRUE)

# ROC curve
roc_c <- roc_curve(knn_model, group)

# save the results
supplements_models[["models_summ"]][["knn_model_ra"]][[model_name]] <- knn_model$model_summary

supplements_models[["roc_cs"]][["knn_model_ra"]][[model_name]] <- knn_model$kfold_rocobjs

# see the results
knn_model$model_summary %>% t()
roc_c
```

**post_ltx vs healthy**

```{r}
group <- c("post_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
```

```{r}
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_colon_uni_data <- binomial_prep(colon_asv_tab,
                                     colon_taxa_tab,
                                     colon_metadata,
                                     group, usage="ml_ra",
                                     patient = TRUE)

# fit the model
knn_model <- knn_binomial(filt_colon_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                          clust_var="Patient",
                              reuse=FALSE,
                              file=model_name,
                              Q="Q1",
                          overfitting_check = TRUE)

# ROC curve
roc_c <- roc_curve(knn_model, group)

# save the results
supplements_models[["models_summ"]][["knn_model_ra"]][[model_name]] <- knn_model$model_summary

supplements_models[["roc_cs"]][["knn_model_ra"]][[model_name]] <- knn_model$kfold_rocobjs

# see the results
knn_model$model_summary %>% t()
roc_c
```

###### Genus level

```{r}
level="genus"
```

Aggregate taxa

```{r}
genus_data <- aggregate_taxa(colon_asv_tab,
                             colon_taxa_tab,
                             taxonomic_level = level)

colon_genus_tab <- genus_data[[1]]
colon_genus_taxa_tab <- genus_data[[2]]
```

**pre_ltx vs healthy**

```{r}
group <- c("pre_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
```

```{r}
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_colon_uni_data <- binomial_prep(colon_genus_tab,
                                     colon_genus_taxa_tab,
                                     colon_metadata,
                                     group, 
                                     usage="ml_ra",
                                     patient = TRUE)

# fit the model
knn_model <- knn_binomial(filt_colon_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                          clust_var="Patient",
                              reuse=FALSE,
                              file=model_name,
                              Q="Q1",
                          overfitting_check = TRUE)

# ROC curve
roc_c <- roc_curve(knn_model, group)

# save the results
supplements_models[["models_summ"]][["knn_model_ra"]][[model_name]] <- knn_model$model_summary

supplements_models[["roc_cs"]][["knn_model_ra"]][[model_name]] <- knn_model$kfold_rocobjs

# see the results
knn_model$model_summary %>% t()
roc_c
```

**pre_ltx vs post_ltx**

```{r}
group <- c("pre_ltx","post_ltx")
comparison_name <- paste0(group[1], " vs ",group[2])
```

```{r}
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_colon_uni_data <- binomial_prep(colon_genus_tab,
                                     colon_genus_taxa_tab,
                                     colon_metadata,
                                     group, 
                                     usage="ml_ra",
                                     patient = TRUE)

# fit the model
knn_model <- knn_binomial(filt_colon_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                          clust_var="Patient",
                              reuse=FALSE,
                              file=model_name,
                              Q="Q1",
                          overfitting_check = TRUE)

# ROC curve
roc_c <- roc_curve(knn_model, group)

# save the results
supplements_models[["models_summ"]][["knn_model_ra"]][[model_name]] <- knn_model$model_summary

supplements_models[["roc_cs"]][["knn_model_ra"]][[model_name]] <- knn_model$kfold_rocobjs

# see the results
knn_model$model_summary %>% t()
roc_c
```

**post_ltx vs healthy**

```{r}
group <- c("post_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
```

```{r}
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_colon_uni_data <- binomial_prep(colon_genus_tab,
                                     colon_genus_taxa_tab,
                                     colon_metadata,
                                     group, 
                                     usage="ml_ra",
                                     patient = TRUE)

# fit the model
knn_model <- knn_binomial(filt_colon_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                          clust_var="Patient",
                              reuse=FALSE,
                              file=model_name,
                              Q="Q1",
                          overfitting_check = TRUE)

# ROC curve
roc_c <- roc_curve(knn_model, group)

# save the results
supplements_models[["models_summ"]][["knn_model_ra"]][[model_name]] <- knn_model$model_summary

supplements_models[["roc_cs"]][["knn_model_ra"]][[model_name]] <- knn_model$kfold_rocobjs

# see the results
knn_model$model_summary %>% t()
roc_c
```

##### Random Forest

###### ASV level

```{r}
level="ASV"
```

***pre_ltx vs healthy***

```{r}
group <- c("pre_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
```

```{r}
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_colon_uni_data <- binomial_prep(colon_asv_tab,
                                     colon_taxa_tab,
                                     colon_metadata,
                                     group, usage="ml_ra",
                                     patient = TRUE)

# fit the model
rf_model <- rf_binomial(filt_colon_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                        clust_var="Patient",
                              reuse=FALSE,
                              file=model_name,
                              Q="Q1",
                        overfitting_check = TRUE)

# ROC curve
roc_c <- roc_curve(rf_model, group)

# save the results
supplements_models[["models_summ"]][["rf_model_ra"]][[model_name]] <- rf_model$model_summary

supplements_models[["roc_cs"]][["rf_model_ra"]][[model_name]] <- rf_model$kfold_rocobjs

# see the results
rf_model$model_summary %>% t()
roc_c
```

**pre_ltx vs post_ltx**

```{r}
group <- c("pre_ltx","post_ltx")
comparison_name <- paste0(group[1], " vs ",group[2])
```

```{r}
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_colon_uni_data <- binomial_prep(colon_asv_tab,
                                     colon_taxa_tab,
                                     colon_metadata,
                                     group, usage="ml_ra",
                                     patient = TRUE)

# fit the model
rf_model <- rf_binomial(filt_colon_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                        clust_var="Patient",
                              reuse=FALSE,
                              file=model_name,
                              Q="Q1",
                        overfitting_check = TRUE)

# ROC curve
roc_c <- roc_curve(rf_model, group)

# save the results
supplements_models[["models_summ"]][["rf_model_ra"]][[model_name]] <- rf_model$model_summary

supplements_models[["roc_cs"]][["rf_model_ra"]][[model_name]] <- rf_model$kfold_rocobjs


# see the results
rf_model$model_summary %>% t()
roc_c
```

**post_ltx vs healthy**

```{r}
group <- c("post_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
```

```{r}
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_colon_uni_data <- binomial_prep(colon_asv_tab,
                                     colon_taxa_tab,
                                     colon_metadata,
                                     group, usage="ml_ra",
                                     patient = TRUE)

# fit the model
rf_model <- rf_binomial(filt_colon_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                        clust_var="Patient",
                              reuse=FALSE,
                              file=model_name,
                              Q="Q1",
                        overfitting_check = TRUE)

# ROC curve
roc_c <- roc_curve(rf_model, group)

# save the results
supplements_models[["models_summ"]][["rf_model_ra"]][[model_name]] <- rf_model$model_summary

supplements_models[["roc_cs"]][["rf_model_ra"]][[model_name]] <- rf_model$kfold_rocobjs


# see the results
rf_model$model_summary %>% t()
roc_c
```

###### Genus level

```{r}
level="genus"
```

Aggregate taxa

```{r}
genus_data <- aggregate_taxa(colon_asv_tab,
                             colon_taxa_tab,
                             taxonomic_level = level)

colon_genus_tab <- genus_data[[1]]
colon_genus_taxa_tab <- genus_data[[2]]
```

**pre_ltx vs healthy**

```{r}
group <- c("pre_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
```

```{r}
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_colon_uni_data <- binomial_prep(colon_genus_tab,
                                     colon_genus_taxa_tab,
                                     colon_metadata,
                                     group, 
                                     usage="ml_ra",
                                     patient = TRUE)

# fit the model
rf_model <- rf_binomial(filt_colon_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                        clust_var="Patient",
                              reuse=FALSE,
                              file=model_name,
                              Q="Q1",
                        overfitting_check = TRUE)

# ROC curve
roc_c <- roc_curve(rf_model, group)

# save the results
supplements_models[["models_summ"]][["rf_model_ra"]][[model_name]] <- rf_model$model_summary

supplements_models[["roc_cs"]][["rf_model_ra"]][[model_name]] <- rf_model$kfold_rocobjs


# see the results
rf_model$model_summary %>% t()
roc_c
```

**pre_ltx vs post_ltx**

```{r}
group <- c("pre_ltx","post_ltx")
comparison_name <- paste0(group[1], " vs ",group[2])
```

```{r}
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_colon_uni_data <- binomial_prep(colon_genus_tab,
                                     colon_genus_taxa_tab,
                                     colon_metadata,
                                     group, 
                                     usage="ml_ra",
                                     patient = TRUE)

# fit the model
rf_model <- rf_binomial(filt_colon_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                        clust_var="Patient",
                              reuse=FALSE,
                              file=model_name,
                              Q="Q1",
                        overfitting_check = TRUE)

# ROC curve
roc_c <- roc_curve(rf_model, group)

# save the results
supplements_models[["models_summ"]][["rf_model_ra"]][[model_name]] <- rf_model$model_summary

supplements_models[["roc_cs"]][["rf_model_ra"]][[model_name]] <- rf_model$kfold_rocobjs


# see the results
rf_model$model_summary %>% t()
roc_c
```

**post_ltx vs healthy**

```{r}
group <- c("post_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
```

```{r}
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_colon_uni_data <- binomial_prep(colon_genus_tab,
                                     colon_genus_taxa_tab,
                                     colon_metadata,
                                     group, 
                                     usage="ml_ra",
                                     patient = TRUE)

# fit the model
rf_model <- rf_binomial(filt_colon_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                        clust_var="Patient",
                              reuse=FALSE,
                              file=model_name,
                              Q="Q1",
                        overfitting_check = TRUE)

# ROC curve
roc_c <- roc_curve(rf_model, group)

# save the results
supplements_models[["models_summ"]][["rf_model_ra"]][[model_name]] <- rf_model$model_summary

supplements_models[["roc_cs"]][["rf_model_ra"]][[model_name]] <- rf_model$kfold_rocobjs


# see the results
rf_model$model_summary %>% t()
roc_c
```

##### Gradient boosting

###### ASV level

```{r}
level="ASV"
```

***pre_ltx vs healthy***

```{r}
group <- c("pre_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
```

```{r}
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_colon_uni_data <- binomial_prep(colon_asv_tab,
                                     colon_taxa_tab,
                                     colon_metadata,
                                     group, usage="ml_ra",
                                     patient = TRUE)

# fit the model
gbm_model <- gbm_binomial(filt_colon_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                          clust_var="Patient",
                              reuse=FALSE,
                              file=model_name,
                              Q="Q1",
                          overfitting_check = TRUE)

# ROC curve
roc_c <- roc_curve(gbm_model, group)

# save the results
supplements_models[["models_summ"]][["gbm_model_ra"]][[model_name]] <- gbm_model$model_summary

supplements_models[["roc_cs"]][["gbm_model_ra"]][[model_name]] <- gbm_model$kfold_rocobjs

# see the results
gbm_model$model_summary %>% t()
roc_c
```

**pre_ltx vs post_ltx**

```{r}
group <- c("pre_ltx","post_ltx")
comparison_name <- paste0(group[1], " vs ",group[2])
```

```{r}
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_colon_uni_data <- binomial_prep(colon_asv_tab,
                                     colon_taxa_tab,
                                     colon_metadata,
                                     group, usage="ml_ra",
                                     patient = TRUE)

# fit the model
gbm_model <- gbm_binomial(filt_colon_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                          clust_var="Patient",
                              reuse=FALSE,
                              file=model_name,
                              Q="Q1",
                          overfitting_check = TRUE)

# ROC curve
roc_c <- roc_curve(gbm_model, group)

# save the results
supplements_models[["models_summ"]][["gbm_model_ra"]][[model_name]] <- gbm_model$model_summary

supplements_models[["roc_cs"]][["gbm_model_ra"]][[model_name]] <- gbm_model$kfold_rocobjs

# see the results
gbm_model$model_summary %>% t()
roc_c
```

**post_ltx vs healthy**

```{r}
group <- c("post_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
```

```{r}
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_colon_uni_data <- binomial_prep(colon_asv_tab,
                                     colon_taxa_tab,
                                     colon_metadata,
                                     group, usage="ml_ra",
                                     patient = TRUE)

# fit the model
gbm_model <- gbm_binomial(filt_colon_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                          clust_var="Patient",
                              reuse=FALSE,
                              file=model_name,
                              Q="Q1",
                          overfitting_check = TRUE)

# ROC curve
roc_c <- roc_curve(gbm_model, group)

# save the results
supplements_models[["models_summ"]][["gbm_model_ra"]][[model_name]] <- gbm_model$model_summary

supplements_models[["roc_cs"]][["gbm_model_ra"]][[model_name]] <- gbm_model$kfold_rocobjs

# see the results
gbm_model$model_summary %>% t()
roc_c
```

###### Genus level

```{r}
level="genus"
```

Aggregate taxa

```{r}
genus_data <- aggregate_taxa(colon_asv_tab,
                             colon_taxa_tab,
                             taxonomic_level = level)

colon_genus_tab <- genus_data[[1]]
colon_genus_taxa_tab <- genus_data[[2]]
```

**pre_ltx vs healthy**

```{r}
group <- c("pre_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
```

```{r}
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_colon_uni_data <- binomial_prep(colon_genus_tab,
                                     colon_genus_taxa_tab,
                                     colon_metadata,
                                     group, 
                                     usage="ml_ra",
                                     patient = TRUE)

# fit the model
gbm_model <- gbm_binomial(filt_colon_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                          clust_var="Patient",
                              reuse=FALSE,
                              file=model_name,
                              Q="Q1",
                          overfitting_check = TRUE)

# ROC curve
roc_c <- roc_curve(gbm_model, group)

# save the results
supplements_models[["models_summ"]][["gbm_model_ra"]][[model_name]] <- gbm_model$model_summary

supplements_models[["roc_cs"]][["gbm_model_ra"]][[model_name]] <- gbm_model$kfold_rocobjs

# see the results
gbm_model$model_summary %>% t()
roc_c
```

**pre_ltx vs post_ltx**

```{r}
group <- c("pre_ltx","post_ltx")
comparison_name <- paste0(group[1], " vs ",group[2])
```

```{r}
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_colon_uni_data <- binomial_prep(colon_genus_tab,
                                     colon_genus_taxa_tab,
                                     colon_metadata,
                                     group, 
                                     usage="ml_ra",
                                     patient = TRUE)

# fit the model
gbm_model <- gbm_binomial(filt_colon_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                          clust_var="Patient",
                              reuse=FALSE,
                              file=model_name,
                              Q="Q1",
                          overfitting_check = TRUE)

# ROC curve
roc_c <- roc_curve(gbm_model, group)

# save the results
supplements_models[["models_summ"]][["gbm_model_ra"]][[model_name]] <- gbm_model$model_summary

supplements_models[["roc_cs"]][["gbm_model_ra"]][[model_name]] <- gbm_model$kfold_rocobjs

# see the results
gbm_model$model_summary %>% t()
roc_c
```

**post_ltx vs healthy**

```{r}
group <- c("post_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
```

```{r}
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_colon_uni_data <- binomial_prep(colon_genus_tab,
                                     colon_genus_taxa_tab,
                                     colon_metadata,
                                     group, 
                                     usage="ml_ra",
                                     patient = TRUE)

# fit the model
gbm_model <- gbm_binomial(filt_colon_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                          clust_var="Patient",
                              reuse=FALSE,
                              file=model_name,
                              Q="Q1",
                          overfitting_check = TRUE)

# ROC curve
roc_c <- roc_curve(gbm_model, group)

# save the results
supplements_models[["models_summ"]][["gbm_model_ra"]][[model_name]] <- gbm_model$model_summary

supplements_models[["roc_cs"]][["gbm_model_ra"]][[model_name]] <- gbm_model$kfold_rocobjs

# see the results
gbm_model$model_summary %>% t()
roc_c
```

#### Saving results

```{r}
models_list <- list()

for (model_name in names(supplements_models$models_summ)){
  df <- do.call(rbind, supplements_models$models_summ[[model_name]])
  models_list[[model_name]] <- df
}

write.xlsx(models_list,
           file=file.path(path,paste0("supplements_models_",segment,".xlsx")),
           rowNames=TRUE)

```

